def main(rs, det): bda_utils.setup_seed(rs) # # 1. BDA Part # ## 1.a. Define BDA methodology # In[53]: def kernel(ker, X1, X2, gamma): K = None if not ker or ker == 'primal': K = X1 elif ker == 'linear': if X2 is not None: K = sklearn.metrics.pairwise.linear_kernel( np.asarray(X1).T, np.asarray(X2).T) else: K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T) elif ker == 'rbf': if X2 is not None: K = sklearn.metrics.pairwise.rbf_kernel( np.asarray(X1).T, np.asarray(X2).T, gamma) else: K = sklearn.metrics.pairwise.rbf_kernel( np.asarray(X1).T, None, gamma) return K def proxy_a_distance(source_X, target_X): """ Compute the Proxy-A-Distance of a source/target representation """ nb_source = np.shape(source_X)[0] nb_target = np.shape(target_X)[0] train_X = np.vstack((source_X, target_X)) train_Y = np.hstack( (np.zeros(nb_source, dtype=int), np.ones(nb_target, dtype=int))) clf = svm.LinearSVC(random_state=0) clf.fit(train_X, train_Y) y_pred = clf.predict(train_X) error = metrics.mean_absolute_error(train_Y, y_pred) dist = 2 * (1 - 2 * error) return dist def estimate_mu(_X1, _Y1, _X2, _Y2): adist_m = proxy_a_distance(_X1, _X2) C = len(np.unique(_Y1)) epsilon = 1e-3 list_adist_c = [] for i in range(1, C + 1): ind_i, ind_j = np.where(_Y1 == i), np.where(_Y2 == i) Xsi = _X1[ind_i[0], :] Xtj = _X2[ind_j[0], :] adist_i = proxy_a_distance(Xsi, Xtj) list_adist_c.append(adist_i) adist_c = sum(list_adist_c) / C mu = adist_c / (adist_c + adist_m) if mu > 1: mu = 1 if mu < epsilon: mu = 0 return mu # In[54]: class BDA: def __init__(self, kernel_type='primal', dim=30, lamb=1, mu=0.5, gamma=1, T=10, mode='BDA', estimate_mu=False): ''' Init func :param kernel_type: kernel, values: 'primal' | 'linear' | 'rbf' :param dim: dimension after transfer :param lamb: lambda value in equation :param mu: mu. Default is -1, if not specificied, it calculates using A-distance :param gamma: kernel bandwidth for rbf kernel :param T: iteration number :param mode: 'BDA' | 'WBDA' :param estimate_mu: True | False, if you want to automatically estimate mu instead of manally set it ''' self.kernel_type = kernel_type self.dim = dim self.lamb = lamb self.mu = mu self.gamma = gamma self.T = T self.mode = mode self.estimate_mu = estimate_mu def fit(self, Xs, Ys, Xt, Yt): ''' Transform and Predict using 1NN as JDA paper did :param Xs: ns * n_feature, source feature :param Ys: ns * 1, source label :param Xt: nt * n_feature, target feature :param Yt: nt * 1, target label :return: acc, y_pred, list_acc ''' # ipdb.set_trace() list_acc = [] X = np.hstack((Xs.T, Xt.T)) # X.shape: [n_feature, ns+nt] X_mean = np.linalg.norm( X, axis=0) # why it's axis=0? the average of features X_mean[X_mean == 0] = 1 X /= X_mean m, n = X.shape ns, nt = len(Xs), len(Xt) e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones( (nt, 1)))) C = np.unique(Ys) H = np.eye(n) - 1 / n * np.ones((n, n)) mu = self.mu M = 0 Y_tar_pseudo = None Xs_new = None for t in range(self.T): # print('\tStarting iter %i'%t) N = 0 M0 = e * e.T * len(C) # ipdb.set_trace() if Y_tar_pseudo is not None: for i in range(len(C)): e = np.zeros((n, 1)) Ns = len(Ys[np.where(Ys == C[i])]) Nt = len(Y_tar_pseudo[np.where(Y_tar_pseudo == C[i])]) if self.mode == 'WBDA': Ps = Ns / len(Ys) Pt = Nt / len(Y_tar_pseudo) alpha = Pt / Ps # mu = 1 else: alpha = 1 tt = Ys == C[i] e[np.where(tt == True)] = 1 / Ns # ipdb.set_trace() yy = Y_tar_pseudo == C[i] ind = np.where(yy == True) inds = [item + ns for item in ind] try: e[tuple(inds)] = -alpha / Nt e[np.isinf(e)] = 0 except: e[tuple(inds)] = 0 # ? N = N + np.dot(e, e.T) # ipdb.set_trace() # In BDA, mu can be set or automatically estimated using A-distance # In WBDA, we find that setting mu=1 is enough if self.estimate_mu and self.mode == 'BDA': if Xs_new is not None: mu = estimate_mu(Xs_new, Ys, Xt_new, Y_tar_pseudo) else: mu = 0 # ipdb.set_trace() M = (1 - mu) * M0 + mu * N M /= np.linalg.norm(M, 'fro') # ipdb.set_trace() K = kernel(self.kernel_type, X, None, gamma=self.gamma) n_eye = m if self.kernel_type == 'primal' else n a, b = np.linalg.multi_dot([ K, M, K.T ]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot( [K, H, K.T]) w, V = scipy.linalg.eig(a, b) ind = np.argsort(w) A = V[:, ind[:self.dim]] Z = np.dot(A.T, K) Z_mean = np.linalg.norm(Z, axis=0) # why it's axis=0? Z_mean[Z_mean == 0] = 1 Z /= Z_mean Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T global device model = sklearn.svm.SVC(kernel='linear').fit( Xs_new, Ys.ravel()) Y_tar_pseudo = model.predict(Xt_new) # ipdb.set_trace() acc = sklearn.metrics.mean_squared_error( Y_tar_pseudo, Yt) # Yt is already in classes # print(acc) return Xs_new, Xt_new, A #, acc, Y_tar_pseudo, list_acc # ## 1.b. Load Data # In[55]: Xs, Xt = bda_utils.load_data(if_weekday=1, if_interdet=1) Xs = Xs[:, det:det + 1] Xt = Xt[:, det:det + 1] Xs, Xs_min, Xs_max = bda_utils.normalize2D(Xs) Xt, Xt_min, Xt_max = bda_utils.normalize2D(Xt) # In[56]: # for i in range(Xs.shape[1]): # plt.figure(figsize=[20,4]) # plt.plot(Xs[:, i]) # plt.plot(Xt[:, i]) # ## 1.d. Hyperparameters # In[57]: label_seq_len = 7 # batch_size = full batch seq_len = 12 reduced_dim = 4 inp_dim = min(Xs.shape[1], Xt.shape[1]) label_dim = min(Xs.shape[1], Xt.shape[1]) hid_dim = 12 layers = 1 lamb = 2 MU = 0.7 bda_dim = label_seq_len - 4 kernel_type = 'linear' hyper = { 'inp_dim': inp_dim, 'label_dim': label_dim, 'label_seq_len': label_seq_len, 'seq_len': seq_len, 'reduced_dim': reduced_dim, 'hid_dim': hid_dim, 'layers': layers, 'lamb': lamb, 'MU': MU, 'bda_dim': bda_dim, 'kernel_type': kernel_type } hyper = pd.DataFrame(hyper, index=['Values']) # In[58]: hyper # ## 1.e. Apply BDA and get $Xs_{new}$, $Xt_{new}$ # In[59]: Xs = Xs[:96, :] # In[60]: # [sample size, seq_len, inp_dim (dets)], [sample size, label_seq_len, inp_dim (dets)] Xs_3d, Ys_3d = bda_utils.sliding_window(Xs, Xs, seq_len, label_seq_len) Xt_3d, Yt_3d = bda_utils.sliding_window(Xt, Xt, seq_len, label_seq_len) Ys_3d = Ys_3d[:, label_seq_len - 1:, :] Yt_3d = Yt_3d[:, label_seq_len - 1:, :] # print(Xs_3d.shape) # print(Ys_3d.shape) # print(Xt_3d.shape) # print(Yt_3d.shape) # In[64]: t_s = time.time() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") Xs_train_3d = [] Ys_train_3d = [] Xt_valid_3d = [] Xt_train_3d = [] Yt_valid_3d = [] Yt_train_3d = [] for i in range(Xs_3d.shape[2]): # print('Starting det %i'%i) bda = BDA(kernel_type='linear', dim=seq_len - reduced_dim, lamb=lamb, mu=MU, gamma=1, T=2) # T is iteration time Xs_new, Xt_new, A = bda.fit( Xs_3d[:, :, i], bda_utils.get_class(Ys_3d[:, :, i]), Xt_3d[:, :, i], bda_utils.get_class( Yt_3d[:, :, i])) # input shape: ns, n_feature | ns, n_label_feature # normalize Xs_new, Xs_new_min, Xs_new_max = bda_utils.normalize2D(Xs_new) Xt_new, Xt_new_min, Xt_new_max = bda_utils.normalize2D(Xt_new) # print(Xs_new.shape) # print(Xt_new.shape) day_train_t = 1 Xs_train = Xs_new.copy() Ys_train = Ys_3d[:, :, i] Xt_valid = Xt_new.copy()[int(96 * day_train_t):, :] Xt_train = Xt_new.copy()[:int(96 * day_train_t), :] Yt_valid = Yt_3d[:, :, i].copy()[int(96 * day_train_t):, :] Yt_train = Yt_3d[:, :, i].copy()[:int(96 * day_train_t), :] Xs_train_3d.append(Xs_train) Ys_train_3d.append(Ys_train) Xt_valid_3d.append(Xt_valid) Xt_train_3d.append(Xt_train) Yt_valid_3d.append(Yt_valid) Yt_train_3d.append(Yt_train) Xs_train_3d = np.array(Xs_train_3d) Ys_train_3d = np.array(Ys_train_3d) Xt_valid_3d = np.array(Xt_valid_3d) Xt_train_3d = np.array(Xt_train_3d) Yt_valid_3d = np.array(Yt_valid_3d) Yt_train_3d = np.array(Yt_train_3d) # bda_utils.save_np(Xs_train_3d, './outputs/BDA/Xs_new_%i.csv'%(bda_utils.get_num()-14/6)) # bda_utils.save_np(Ys_train_3d, './outputs/BDA/Xt_new_%i.csv'%(bda_utils.get_num()-14/6)) # bda_utils.save_np(Xt_valid_3d, './outputs/BDA/Xs_new_%i.csv'%(bda_utils.get_num()-14/6)) # bda_utils.save_np(Xt_train_3d, './outputs/BDA/Xt_new_%i.csv'%(bda_utils.get_num()-14/6)) # bda_utils.save_np(Yt_valid_3d, './outputs/BDA/Xs_new_%i.csv'%(bda_utils.get_num()-14/6)) # bda_utils.save_np(Yt_train_3d, './outputs/BDA/Xt_new_%i.csv'%(bda_utils.get_num()-14/6)) # print('Time spent:%.5f'%(time.time()-t_s)) # In[65]: Xs_train_3d = np.transpose(Xs_train_3d, (1, 2, 0)) Ys_train_3d = np.transpose(Ys_train_3d, (1, 2, 0)) Xt_valid_3d = np.transpose(Xt_valid_3d, (1, 2, 0)) Xt_train_3d = np.transpose(Xt_train_3d, (1, 2, 0)) Yt_valid_3d = np.transpose(Yt_valid_3d, (1, 2, 0)) Yt_train_3d = np.transpose(Yt_train_3d, (1, 2, 0)) # In[66]: Ys_train_3d.shape # # 2. Learning Part # ## 2.a. Build network # In[67]: from bda_utils import traff_net_reg # ## 2.b. Assemble Dataloader # In[68]: batch_size = 1960 train_x = np.vstack([Xs_train_3d, Xt_train_3d]) train_y = np.vstack([Ys_train_3d, Yt_train_3d]) train_x = torch.tensor(train_x, dtype=torch.float32).to(device) train_y = torch.tensor(train_y, dtype=torch.float32).to(device) Xt_valid_3d = torch.tensor(Xt_valid_3d, dtype=torch.float32).to(device) Yt_valid_3d = torch.tensor(Yt_valid_3d, dtype=torch.float32).to(device) train_dataset = TensorDataset(train_x, train_y) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=False) train_iter = iter(train_loader) # print(train_x.shape) # print(train_y.shape) # print('\n') # print(Xt_valid_3d.shape) # print(Yt_valid_3d.shape) # ## 2.c. Learn # In[69]: # build model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = traff_net_reg(inp_dim, label_dim, seq_len - reduced_dim, label_seq_len).to(device) criterion = nn.MSELoss() #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 0.7) train_loss_set = [] val_loss_set = [] det = 0 # which detector to visualize num_fold = len(next(iter(os.walk('./runs/')))[1]) os.mkdir('./runs/run%i' % (num_fold + 1)) # In[70]: optimizer = torch.optim.Adam(net.parameters()) # In[71]: # train net.train() epochs = 1001 for e in range(epochs): for i in range(len(train_loader)): try: data, label = train_iter.next() except: train_iter = iter(train_loader) data, label = train_iter.next() # ipdb.set_trace() out = net(data) loss = criterion(out, label[:, 0, 0].reshape( -1, 1)) # label.shape=[batch, 1, num_dets] optimizer.zero_grad() loss.backward() optimizer.step() val_out = net(Xt_valid_3d) val_loss = criterion(val_out, Yt_valid_3d[:, 0, 0].reshape(-1, 1)) val_loss_set.append(val_loss.cpu().detach().numpy()) train_loss_set.append(loss.cpu().detach().numpy()) if e % 50 == 0: # # ipdb.set_trace() # fig = plt.figure(figsize=[16,4]) # ax1 = fig.add_subplot(111) # ax1.plot(bda_utils.get_class(label)[:, 0, det].cpu().detach().numpy(), label='ground truth') # ax1.plot(100*out.cpu().detach().numpy(), label='predict') # ax1.legend() # plt.show() # print('Epoch No. %i success, loss: %.5f, val loss: %.5f' %(e, loss.cpu().detach().numpy(), val_loss.cpu().detach().numpy() )) ## SAVE BEST MODEL # initialize model best_net = traff_net_reg(inp_dim, label_dim, seq_len - reduced_dim, label_seq_len).to(device) # load existing parameters, or create one try: best_net.load_state_dict( torch.load('./runs/run%i/best.pth' % (num_fold + 1))) except: print('Saved initial model parameters') bda_utils.save_model(net, 'best') best_net = net # try to calculate nrmse, or skip this loop try: nrmse_best = bda_utils.nrmse_loss_func( best_net(Xt_valid_3d).cpu().detach().numpy().flatten(), \ Yt_valid_3d[:, 0, det].cpu().flatten().detach().numpy(), 0 ) nrmse = bda_utils.nrmse_loss_func( val_out.cpu().detach().numpy().flatten(), \ Yt_valid_3d[:, 0, det].cpu().flatten().detach().numpy(), 0 ) except: print('No feasible prediction, new model parameters saved') bda_utils.save_model(net, 'best') continue if nrmse < nrmse_best: print('Updated model parameters') bda_utils.save_model(net, 'best') bda_utils.save_model(net, 'last') # In[43]: # bda_utils.save_np(Xs_new, 'Xs_new.csv') # In[72]: # fig = plt.figure(figsize = [16, 4]) # ax1 = fig.add_subplot(121) # ax1.plot(train_loss_set) # ax1.set_xlabel('Epochs') # ax1.set_ylabel('MSELoss') # ax1.set_title('Train') # ax2 = fig.add_subplot(122) # ax2.plot(val_loss_set) # ax2.set_xlabel('Epochs') # ax2.set_ylabel('MSELoss') # ax1.set_title('Validation') # # 3. Evaluation # In[73]: best_net = traff_net_reg(inp_dim, label_dim, seq_len - reduced_dim, label_seq_len).to(device) best_net.load_state_dict( torch.load('./runs/run%i/best.pth' % (num_fold + 1))) val_out = best_net(Xt_valid_3d) g_t = Yt_valid_3d[:, 0, det].cpu().flatten().detach().numpy() pred = val_out.cpu().detach().numpy().flatten() g_t = g_t * (Xt_max - Xt_min) + Xt_min pred = pred * (Xt_max - Xt_min) + Xt_min pred_ = pred.copy() pred_[pred_ < 0] = 0 # plt.figure(figsize=[16,4]) # plt.plot(g_t, label='label') # plt.plot(pred_, label='predict') # plt.legend() # In[74]: # sklearn.metrics.accuracy_score(torch.argmax(val_out, dim=1).cpu(), bda_utils.get_class(Yt_valid_3d[:, 0, det]).cpu().flatten()) nrmse = bda_utils.nrmse_loss_func(pred_, g_t, 0) mape = bda_utils.mape_loss_func(pred_, g_t, 0) smape = bda_utils.smape_loss_func(pred_, g_t, 0) mae = bda_utils.mae_loss_func(pred_, g_t, 0) nmae = bda_utils.nmae_loss_func(pred, g_t, 0) return nrmse, mape, smape, mae, nmae
def main(seq_len, reduced_dim): def kernel(ker, X1, X2, gamma): K = None if not ker or ker == 'primal': K = X1 elif ker == 'linear': if X2 is not None: K = sklearn.metrics.pairwise.linear_kernel( np.asarray(X1).T, np.asarray(X2).T) else: K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T) elif ker == 'rbf': if X2 is not None: K = sklearn.metrics.pairwise.rbf_kernel( np.asarray(X1).T, np.asarray(X2).T, gamma) else: K = sklearn.metrics.pairwise.rbf_kernel( np.asarray(X1).T, None, gamma) return K def proxy_a_distance(source_X, target_X): """ Compute the Proxy-A-Distance of a source/target representation """ nb_source = np.shape(source_X)[0] nb_target = np.shape(target_X)[0] train_X = np.vstack((source_X, target_X)) train_Y = np.hstack( (np.zeros(nb_source, dtype=int), np.ones(nb_target, dtype=int))) clf = svm.LinearSVC(random_state=0) clf.fit(train_X, train_Y) y_pred = clf.predict(train_X) error = metrics.mean_absolute_error(train_Y, y_pred) dist = 2 * (1 - 2 * error) return dist def estimate_mu(_X1, _Y1, _X2, _Y2): adist_m = proxy_a_distance(_X1, _X2) C = len(np.unique(_Y1)) epsilon = 1e-3 list_adist_c = [] for i in range(1, C + 1): ind_i, ind_j = np.where(_Y1 == i), np.where(_Y2 == i) Xsi = _X1[ind_i[0], :] Xtj = _X2[ind_j[0], :] adist_i = proxy_a_distance(Xsi, Xtj) list_adist_c.append(adist_i) adist_c = sum(list_adist_c) / C mu = adist_c / (adist_c + adist_m) if mu > 1: mu = 1 if mu < epsilon: mu = 0 return mu # In[4]: class BDA: def __init__(self, kernel_type='primal', dim=30, lamb=1, mu=0.5, gamma=1, T=10, mode='BDA', estimate_mu=False): ''' Init func :param kernel_type: kernel, values: 'primal' | 'linear' | 'rbf' :param dim: dimension after transfer :param lamb: lambda value in equation :param mu: mu. Default is -1, if not specificied, it calculates using A-distance :param gamma: kernel bandwidth for rbf kernel :param T: iteration number :param mode: 'BDA' | 'WBDA' :param estimate_mu: True | False, if you want to automatically estimate mu instead of manally set it ''' self.kernel_type = kernel_type self.dim = dim self.lamb = lamb self.mu = mu self.gamma = gamma self.T = T self.mode = mode self.estimate_mu = estimate_mu def fit(self, Xs, Ys, Xt, Yt): ''' Transform and Predict using 1NN as JDA paper did :param Xs: ns * n_feature, source feature :param Ys: ns * 1, source label :param Xt: nt * n_feature, target feature :param Yt: nt * 1, target label :return: acc, y_pred, list_acc ''' # ipdb.set_trace() list_acc = [] X = np.hstack((Xs.T, Xt.T)) # X.shape: [n_feature, ns+nt] X_mean = np.linalg.norm( X, axis=0) # why it's axis=0? the average of features X_mean[X_mean == 0] = 1 X /= X_mean m, n = X.shape ns, nt = len(Xs), len(Xt) e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones( (nt, 1)))) C = np.unique(Ys) H = np.eye(n) - 1 / n * np.ones((n, n)) mu = self.mu M = 0 Y_tar_pseudo = None Xs_new = None for t in range(self.T): print('\tStarting iter %i' % t) N = 0 M0 = e * e.T * len(C) # ipdb.set_trace() if Y_tar_pseudo is not None: for i in range(len(C)): e = np.zeros((n, 1)) Ns = len(Ys[np.where(Ys == C[i])]) Nt = len(Y_tar_pseudo[np.where(Y_tar_pseudo == C[i])]) # Ns = 1 # Nt = 1 alpha = 1 # bda tt = Ys == C[i] e[np.where(tt == True)] = 1 / Ns # ipdb.set_trace() yy = Y_tar_pseudo == C[i] ind = np.where(yy == True) inds = [item + ns for item in ind] try: e[tuple(inds)] = -alpha / Nt e[np.isinf(e)] = 0 except: e[tuple(inds)] = 0 # ? N = N + np.dot(e, e.T) # ipdb.set_trace() # In BDA, mu can be set or automatically estimated using A-distance # In WBDA, we find that setting mu=1 is enough if self.estimate_mu and self.mode == 'BDA': if Xs_new is not None: mu = estimate_mu(Xs_new, Ys, Xt_new, Y_tar_pseudo) else: mu = 0 # ipdb.set_trace() M = (1 - mu) * M0 + mu * N M /= np.linalg.norm(M, 'fro') # ipdb.set_trace() K = kernel(self.kernel_type, X, None, gamma=self.gamma) n_eye = m if self.kernel_type == 'primal' else n a, b = np.linalg.multi_dot([ K, M, K.T ]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot( [K, H, K.T]) w, V = scipy.linalg.eig(a, b) ind = np.argsort(w) A = V[:, ind[:self.dim]] Z = np.dot(A.T, K) Z_mean = np.linalg.norm(Z, axis=0) # why it's axis=0? Z_mean[Z_mean == 0] = 1 Z /= Z_mean Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T global device model = sklearn.neighbors.KNeighborsClassifier().fit( Xs_new, Ys.ravel()) Y_tar_pseudo = model.predict(Xt_new) # ipdb.set_trace() acc = sklearn.metrics.accuracy_score( Y_tar_pseudo, Yt) # Yt is already in classes # print(acc) return Xs_new, Xt_new, A #, acc, Y_tar_pseudo, list_acc # ## 1.b. Load Data # In[46]: Xs, Xt = bda_utils.load_data(if_weekday=1, if_interdet=1) Xs = Xs[:, :1] Xt = Xt[:, :1] Xs, Xs_min, Xs_max = bda_utils.normalize2D(Xs) Xt, Xt_min, Xt_max = bda_utils.normalize2D(Xt) # ## 1.d. Hyperparameters # In[86]: label_seq_len = 3 # batch_size = full batch seq_len = 48 reduced_dim = 15 inp_dim = min(Xs.shape[1], Xt.shape[1]) label_dim = min(Xs.shape[1], Xt.shape[1]) hid_dim = 12 layers = 1 lamb = 3 hyper = { 'inp_dim': inp_dim, 'label_dim': label_dim, 'label_seq_len': label_seq_len, 'seq_len': seq_len, 'reduced_dim': reduced_dim, 'hid_dim': hid_dim, 'layers': layers, 'lamb': lamb } hyper = pd.DataFrame(hyper, index=['Values']) # In[87]: hyper # ## 1.e. Apply BDA and get $Xs_{new}$, $Xt_{new}$ # In[88]: # [sample size, seq_len, inp_dim (dets)], [sample size, label_seq_len, inp_dim (dets)] Xs_3d, Ys_3d = bda_utils.sliding_window(Xs, Xs, seq_len, 1) Xt_3d, Yt_3d = bda_utils.sliding_window(Xt, Xt, seq_len, 1) # In[89]: t_s = time.time() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") Xs_train_3d = [] Ys_train_3d = [] Xt_valid_3d = [] Xt_train_3d = [] Yt_valid_3d = [] Yt_train_3d = [] for i in range(Xs_3d.shape[2]): bda = BDA(kernel_type='linear', dim=seq_len - reduced_dim, lamb=lamb, mu=0.6, gamma=1, T=1) # T is iteration time Xs_new, Xt_new, A = bda.fit( Xs_3d[:, :, i], bda_utils.get_class(Ys_3d[:, :, i]), Xt_3d[:, :, i], bda_utils.get_class( Yt_3d[:, :, i])) # input shape: ns, n_feature | ns, n_label_feature day_train_t = 1 Xs_train = Xs_new.copy() Ys_train = Ys_3d[:, :, i] Xt_valid = Xt_new.copy()[int(96 * day_train_t):, :] Xt_train = Xt_new.copy()[:int(96 * day_train_t), :] Yt_valid = Yt_3d[:, :, i].copy()[int(96 * day_train_t):, :] Yt_train = Yt_3d[:, :, i].copy()[:int(96 * day_train_t), :] Xs_train_3d.append(Xs_train) Ys_train_3d.append(Ys_train) Xt_valid_3d.append(Xt_valid) Xt_train_3d.append(Xt_train) Yt_valid_3d.append(Yt_valid) Yt_train_3d.append(Yt_train) Xs_train_3d = np.array(Xs_train_3d) Ys_train_3d = np.array(Ys_train_3d) Xt_valid_3d = np.array(Xt_valid_3d) Xt_train_3d = np.array(Xt_train_3d) Yt_valid_3d = np.array(Yt_valid_3d) Yt_train_3d = np.array(Yt_train_3d) # In[90]: Xs_train_3d = np.transpose(Xs_train_3d, (1, 2, 0)) Ys_train_3d = np.transpose(Ys_train_3d, (1, 2, 0)) Xt_valid_3d = np.transpose(Xt_valid_3d, (1, 2, 0)) Xt_train_3d = np.transpose(Xt_train_3d, (1, 2, 0)) Yt_valid_3d = np.transpose(Yt_valid_3d, (1, 2, 0)) Yt_train_3d = np.transpose(Yt_train_3d, (1, 2, 0)) Xs_train_3d.shape class traff_net(nn.Module): def __init__(self, seq_len, hid_dim=12, layers=3): super(traff_net, self).__init__() self.seq_len = seq_len self.fc = nn.Sequential( nn.Linear(seq_len, seq_len * 8), nn.ReLU(), nn.Linear(seq_len * 8, seq_len * 32), nn.ReLU(), nn.Linear(seq_len * 32, seq_len * 64), nn.ReLU(), nn.Linear(seq_len * 64, 100 + 1), # 101 classes (0-101) nn.ReLU(), ) # regression def forward(self, x): y = nn.Flatten()(x) y = self.fc(y) # fully connected layer return y batch_size = 1960 train_x = np.vstack([Xs_train_3d, Xt_train_3d]) train_y = np.vstack([Ys_train_3d, Yt_train_3d]) train_x = torch.tensor(train_x, dtype=torch.float32).to(device) train_y = torch.tensor(train_y, dtype=torch.float32).to(device) Xt_valid_3d = torch.tensor(Xt_valid_3d, dtype=torch.float32).to(device) Yt_valid_3d = torch.tensor(Yt_valid_3d, dtype=torch.float32).to(device) train_dataset = TensorDataset(train_x, train_y) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size, shuffle=False) train_iter = iter(train_loader) # build model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = traff_net(seq_len - reduced_dim).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=0.01) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 0.7) train_loss_set = [] val_loss_set = [] det = 0 # which detector to visualize # In[96]: # train net.train() epochs = 201 for e in range(epochs): for i in range(len(train_loader)): try: data, label = train_iter.next() except: train_iter = iter(train_loader) data, label = train_iter.next() out = net(data) loss = criterion(out, bda_utils.get_class(label[:, 0, 0]).flatten(). long()) # label.shape=[batch, 1, num_dets] optimizer.zero_grad() loss.backward() optimizer.step() val_out = net(Xt_valid_3d) val_loss = criterion( val_out, bda_utils.get_class(Yt_valid_3d[:, 0, 0]).flatten().long()) val_loss_set.append(val_loss.cpu().detach().numpy()) train_loss_set.append(loss.cpu().detach().numpy()) return loss.cpu().detach().numpy(), val_loss.cpu().detach().numpy()
def main(rs, det): bda_utils.setup_seed(10) # # 1. BDA Part # ## 1.a. Define BDA methodology # In[3]: def kernel(ker, X1, X2, gamma): K = None if not ker or ker == 'primal': K = X1 elif ker == 'linear': if X2 is not None: K = sklearn.metrics.pairwise.linear_kernel( np.asarray(X1).T, np.asarray(X2).T) else: K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T) elif ker == 'rbf': if X2 is not None: K = sklearn.metrics.pairwise.rbf_kernel( np.asarray(X1).T, np.asarray(X2).T, gamma) else: K = sklearn.metrics.pairwise.rbf_kernel( np.asarray(X1).T, None, gamma) return K def proxy_a_distance(source_X, target_X): """ Compute the Proxy-A-Distance of a source/target representation """ nb_source = np.shape(source_X)[0] nb_target = np.shape(target_X)[0] train_X = np.vstack((source_X, target_X)) train_Y = np.hstack( (np.zeros(nb_source, dtype=int), np.ones(nb_target, dtype=int))) clf = svm.LinearSVC(random_state=0) clf.fit(train_X, train_Y) y_pred = clf.predict(train_X) error = metrics.mean_absolute_error(train_Y, y_pred) dist = 2 * (1 - 2 * error) return dist def estimate_mu(_X1, _Y1, _X2, _Y2): adist_m = proxy_a_distance(_X1, _X2) C = len(np.unique(_Y1)) epsilon = 1e-3 list_adist_c = [] for i in range(1, C + 1): ind_i, ind_j = np.where(_Y1 == i), np.where(_Y2 == i) Xsi = _X1[ind_i[0], :] Xtj = _X2[ind_j[0], :] adist_i = proxy_a_distance(Xsi, Xtj) list_adist_c.append(adist_i) adist_c = sum(list_adist_c) / C mu = adist_c / (adist_c + adist_m) if mu > 1: mu = 1 if mu < epsilon: mu = 0 return mu # In[4]: class BDA: def __init__(self, kernel_type='primal', dim=30, lamb=1, mu=0.5, gamma=1, T=10, mode='BDA', estimate_mu=False): ''' Init func :param kernel_type: kernel, values: 'primal' | 'linear' | 'rbf' :param dim: dimension after transfer :param lamb: lambda value in equation :param mu: mu. Default is -1, if not specificied, it calculates using A-distance :param gamma: kernel bandwidth for rbf kernel :param T: iteration number :param mode: 'BDA' | 'WBDA' :param estimate_mu: True | False, if you want to automatically estimate mu instead of manally set it ''' self.kernel_type = kernel_type self.dim = dim self.lamb = lamb self.mu = mu self.gamma = gamma self.T = T self.mode = mode self.estimate_mu = estimate_mu def fit(self, Xs, Ys, Xt, Yt): ''' Transform and Predict using 1NN as JDA paper did :param Xs: ns * n_feature, source feature :param Ys: ns * 1, source label :param Xt: nt * n_feature, target feature :param Yt: nt * 1, target label :return: acc, y_pred, list_acc ''' # ipdb.set_trace() list_acc = [] X = np.hstack((Xs.T, Xt.T)) # X.shape: [n_feature, ns+nt] X_mean = np.linalg.norm( X, axis=0) # why it's axis=0? the average of features X_mean[X_mean == 0] = 1 X /= X_mean m, n = X.shape ns, nt = len(Xs), len(Xt) e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones( (nt, 1)))) C = np.unique(Ys) H = np.eye(n) - 1 / n * np.ones((n, n)) mu = self.mu M = 0 Y_tar_pseudo = None Xs_new = None for t in range(self.T): # print('\tStarting iter %i'%t) N = 0 M0 = e * e.T * len(C) # ipdb.set_trace() if Y_tar_pseudo is not None: for i in range(len(C)): e = np.zeros((n, 1)) Ns = len(Ys[np.where(Ys == C[i])]) Nt = len(Y_tar_pseudo[np.where(Y_tar_pseudo == C[i])]) if self.mode == 'WBDA': Ps = Ns / len(Ys) Pt = Nt / len(Y_tar_pseudo) alpha = Pt / Ps # mu = 1 else: alpha = 1 tt = Ys == C[i] e[np.where(tt == True)] = 1 / Ns # ipdb.set_trace() yy = Y_tar_pseudo == C[i] ind = np.where(yy == True) inds = [item + ns for item in ind] try: e[tuple(inds)] = -alpha / Nt e[np.isinf(e)] = 0 except: e[tuple(inds)] = 0 # ? N = N + np.dot(e, e.T) # ipdb.set_trace() # In BDA, mu can be set or automatically estimated using A-distance # In WBDA, we find that setting mu=1 is enough if self.estimate_mu and self.mode == 'BDA': if Xs_new is not None: mu = estimate_mu(Xs_new, Ys, Xt_new, Y_tar_pseudo) else: mu = 0 # ipdb.set_trace() M = (1 - mu) * M0 + mu * N M /= np.linalg.norm(M, 'fro') # ipdb.set_trace() K = kernel(self.kernel_type, X, None, gamma=self.gamma) n_eye = m if self.kernel_type == 'primal' else n a, b = np.linalg.multi_dot([ K, M, K.T ]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot( [K, H, K.T]) w, V = scipy.linalg.eig(a, b) ind = np.argsort(w) A = V[:, ind[:self.dim]] Z = np.dot(A.T, K) Z_mean = np.linalg.norm(Z, axis=0) # why it's axis=0? Z_mean[Z_mean == 0] = 1 Z /= Z_mean Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T global device model = sklearn.svm.SVC(kernel='linear').fit( Xs_new, Ys.ravel()) Y_tar_pseudo = model.predict(Xt_new) # ipdb.set_trace() acc = sklearn.metrics.mean_squared_error( Y_tar_pseudo, Yt) # Yt is already in classes # print(acc) return Xs_new, Xt_new, A #, acc, Y_tar_pseudo, list_acc # ## 1.b. Load Data # In[5]: Xs, Xt = bda_utils.load_data(if_weekday=1, if_interdet=1) Xs = Xs[:, det:det + 1] Xt = Xt[:, det:det + 1] Xs, Xs_min, Xs_max = bda_utils.normalize2D(Xs) Xt, Xt_min, Xt_max = bda_utils.normalize2D(Xt) # In[6]: # for i in range(Xs.shape[1]): # plt.figure(figsize=[20,4]) # plt.plot(Xs[:, i]) # plt.plot(Xt[:, i]) # ## 1.d. Hyperparameters # In[7]: label_seq_len = 7 # batch_size = full batch seq_len = 12 reduced_dim = 4 inp_dim = min(Xs.shape[1], Xt.shape[1]) label_dim = min(Xs.shape[1], Xt.shape[1]) hid_dim = 12 layers = 1 lamb = 2 MU = 0.7 bda_dim = label_seq_len - 4 kernel_type = 'linear' hyper = { 'inp_dim': inp_dim, 'label_dim': label_dim, 'label_seq_len': label_seq_len, 'seq_len': seq_len, 'reduced_dim': reduced_dim, 'hid_dim': hid_dim, 'layers': layers, 'lamb': lamb, 'MU': MU, 'bda_dim': bda_dim, 'kernel_type': kernel_type } hyper = pd.DataFrame(hyper, index=['Values']) # In[8]: hyper # ## 1.e. Apply BDA and get $Xs_{new}$, $Xt_{new}$ # In[9]: Xs = Xs[:96, :] # In[10]: # [sample size, seq_len, inp_dim (dets)], [sample size, label_seq_len, inp_dim (dets)] Xs_3d, Ys_3d = bda_utils.sliding_window(Xs, Xs, seq_len, label_seq_len) Xt_3d, Yt_3d = bda_utils.sliding_window(Xt, Xt, seq_len, label_seq_len) Ys_3d = Ys_3d[:, label_seq_len - 1:, :] Yt_3d = Yt_3d[:, label_seq_len - 1:, :] # print(Xs_3d.shape) # print(Ys_3d.shape) # print(Xt_3d.shape) # print(Yt_3d.shape) # In[11]: t_s = time.time() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") Xs_train_3d = [] Ys_train_3d = [] Xt_valid_3d = [] Xt_train_3d = [] Yt_valid_3d = [] Yt_train_3d = [] for i in range(Xs_3d.shape[2]): # print('Starting det %i'%i) bda = BDA(kernel_type='linear', dim=seq_len - reduced_dim, lamb=lamb, mu=MU, gamma=1, T=2) # T is iteration time Xs_new, Xt_new, A = bda.fit( Xs_3d[:, :, i], bda_utils.get_class(Ys_3d[:, :, i]), Xt_3d[:, :, i], bda_utils.get_class( Yt_3d[:, :, i])) # input shape: ns, n_feature | ns, n_label_feature # normalize Xs_new, Xs_new_min, Xs_new_max = bda_utils.normalize2D(Xs_new) Xt_new, Xt_new_min, Xt_new_max = bda_utils.normalize2D(Xt_new) # print(Xs_new.shape) # print(Xt_new.shape) day_train_t = 1 Xs_train = Xs_new.copy() Ys_train = Ys_3d[:, :, i] Xt_valid = Xt_new.copy()[int(96 * day_train_t):, :] Xt_train = Xt_new.copy()[:int(96 * day_train_t), :] Yt_valid = Yt_3d[:, :, i].copy()[int(96 * day_train_t):, :] Yt_train = Yt_3d[:, :, i].copy()[:int(96 * day_train_t), :] # print('Time spent:%.5f'%(time.time()-t_s)) # In[12]: # print(Xs_train.shape) # print(Ys_train.shape) # print(Xt_valid.shape) # print(Xt_train.shape) # print(Yt_valid.shape) # print(Yt_train.shape) # In[13]: train_x = np.vstack([Xs_train, Xt_train]) train_y = np.vstack([Ys_train, Yt_train]) # # 2. Regression Part # In[14]: import xgboost as xgb # In[15]: regr = xgb.XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, gamma=0, importance_type='gain', learning_rate=0.1, max_delta_step=0, max_depth=3, min_child_weight=1, missing=1, n_estimators=100, n_jobs=1, nthread=None, objective='reg:squarederror', random_state=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, silent=None, subsample=1, verbosity=1, tree_method="hist") regr.fit(train_x, train_y.flatten()) # # 3. Evaluation # In[16]: g_t = Yt_valid.flatten() pred = regr.predict(Xt_valid) # plt.figure(figsize=[16,4]) # plt.plot(g_t, label='label') # plt.plot(pred, label='predict') # plt.legend() # In[17]: nrmse = bda_utils.nrmse_loss_func(pred, g_t, 0) mape = bda_utils.mape_loss_func(pred, g_t, 0) smape = bda_utils.smape_loss_func(pred, g_t, 0) mae = bda_utils.mae_loss_func(pred, g_t, 0) nmae = bda_utils.nmae_loss_func(pred, g_t, 0) return nrmse, mape, smape, mae, nmae