class cca: def __init__(self, n_components=1, ccatype=None): self.n_components = n_components self.ccatype = ccatype def derive_transform(self, A, B): self.model = CCA(n_components=self.n_components, scale=False).fit(A, B) if self.ccatype == 'new': # http://onlinelibrary.wiley.com/doi/10.1002/cem.2637/abstract F1 = np.linalg.pinv(self.model.x_scores_).dot(self.model.y_scores_) F2 = np.linalg.pinv(self.model.y_scores_).dot(B) P = ct.multi_dot((self.model.x_weights_, F1, F2)) self.proj_to_B = P else: return self.model def apply_transform(self, C): C = np.array(C) if self.ccatype == 'new': return C.dot(self.proj_to_B) else: if len(C.shape) == 1: C = C.reshape(1, -1) return self.model.predict(C)
class _CCAImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X) def predict(self, X): return self._wrapped_model.predict(X)
class BiomeCCA(): def __init__(self, args): self.CCA = CCA(n_components=args.latent_size) def fit(self,X1_train, X2_train, y_train, X1_val, X2_val, y_val, args,): if args.latent_size > min(X1_train.shape[1], X2_train.shape[1]): print("Warning: auto reduce latent size") self.CCA = CCA(n_components=min(X1_train.shape[1], X2_train.shape[1])) return self.CCA.fit(X1_train, X2_train) def transform(self,x1_train, x2_train, y, args): return self.CCA.transform(x1_train, x2_train) def predict(self,X1_val, X2_val, y_val, args): return self.CCA.predict(X1_val) def get_transformation(self): return self.CCA.coef_ def param_l0(self): return {"Encoder":self.CCA.coef_.shape[0]*self.CCA.n_components, "Decoder":self.CCA.n_components*self.CCA.coef_.shape[1]} def get_graph(self): return ([],[])
def cca(self, X, y): cca_model = CCA(n_components=self.n_comp, scale=False) cca_model.fit(X, y) X_c, y_c = cca_model.transform(X, y) y_predict = cca_model.predict(X, copy=True) # R2=cca_model.score(X, y, sample_weight=None) # loading 为每个原始变量与对应典型变量的相关性* loading_x = cca_model.x_loadings_ loading_y = cca_model.y_loadings_ # weight即为线性组合的系数,可能可以用来将降维的变量投射到原始空间 # 注意:如果scale参数设置为Ture,则weight是原始数据经过标准化后得到的weight weight_x = cca_model.x_weights_ weight_y = cca_model.y_weights_ # weight_orig=np.dot(y_c[0,:],weight_y.T) # coef为X对y的系数,可以用来预测y(np.dot,矩阵乘法) coef = cca_model.coef_ # 此算法中rotations==weight # rotation_y=cca_model.y_rotations_ # rotation_x=cca_model.x_rotations_ # score(X,y)返回R squre # 求某个典型变量对本组变量的协方差解释度(covariance explained by each canonical variate or component) cov_x = np.cov(X_c.T) cov_y = np.cov(y_c.T) # np.diag(cov_x) eigvals_x, _ = np.linalg.eig(cov_x) eigvals_y, _ = np.linalg.eig(cov_y) explain_x = pow(eigvals_x, 2) / np.sum(pow(eigvals_x, 2)) explain_y = pow(eigvals_y, 2) / np.sum(pow(eigvals_y, 2)) # np.sort(explain) return (cca_model,\ X_c,y_c,\ loading_x,loading_y,\ weight_x,weight_y,\ explain_x,explain_y,\ coef,y_predict)
def main(args): (training_file, label_file, test_file, test_label, u_file) = args X_training = load_feat(training_file) n = len(X_training) U = load_feat(u_file) y_training = [int(line.strip()) for line in open(label_file)] U = np.asarray(U) X_training = np.asarray(X_training) #X = preprocessing.normalize(X, norm='l2') y_training = np.asarray(y_training) X_test = load_feat(test_file) y_test = [int(line.strip()) for line in open(test_label)] X_test = np.asarray(X_test) #test_X = preprocessing.normalize(test_X, norm='l2') y_test = np.asarray(y_test) cca = CCA(n_components=100) (X_cca, U_cca) = cca.fit_transform(X_training, U[:n]) X_test_cca = cca.predict(X_test) svr = SVC() svr.fit(X_cca, y_training) pred = svr.predict(X_test_cca) print pred print test_y print accuracy_score(y_test, pred) with open(test_file + '.cca.2.pred', 'w') as output: for p in pred: print >>output, p #svm_model.fit(X, y) #pickle.dump(lr, open(model_file, "wb")) return return
def main(args): (training_file, label_file, test_file, test_label, u_file) = args X_training = load_feat(training_file) n = len(X_training) U = load_feat(u_file) y_training = [int(line.strip()) for line in open(label_file)] U = np.asarray(U) X_training = np.asarray(X_training) #X = preprocessing.normalize(X, norm='l2') y_training = np.asarray(y_training) X_test = load_feat(test_file) y_test = [int(line.strip()) for line in open(test_label)] X_test = np.asarray(X_test) #test_X = preprocessing.normalize(test_X, norm='l2') y_test = np.asarray(y_test) cca = CCA(n_components=100) (X_cca, U_cca) = cca.fit_transform(X_training, U[:n]) X_test_cca = cca.predict(X_test) svr = SVC() svr.fit(X_cca, y_training) pred = svr.predict(X_test_cca) print pred print test_y print accuracy_score(y_test, pred) with open(test_file + '.cca.2.pred', 'w') as output: for p in pred: print >> output, p #svm_model.fit(X, y) #pickle.dump(lr, open(model_file, "wb")) return return
def test_cca_implementation(): X = np.random.multivariate_normal(np.random.randint(50,100,(10)).astype('float'),np.identity(10),200) Y = np.random.multivariate_normal(np.random.randint(80,200,(6)).astype('float'),np.identity(6),200) X_test = np.random.multivariate_normal(np.random.randint(50,100,(10)).astype('float'),np.identity(10),20) Y_test = np.random.multivariate_normal(np.random.randint(50,100,(6)).astype('float'),np.identity(6),20) mdl_test = CCA(n_components = 6) mdl_test.fit(X,Y) Y_pred = mdl_test.predict(X) print Y_pred print '-'*50 # print Y_test from sklearn.cross_decomposition import CCA as CCA_sklearn mdl_actual = CCA_sklearn(n_components = 6) mdl_actual.fit(X,Y) print '-'*50 Y_actual = mdl_actual.predict(X) print Y_actual
def cca(x, neural_data, region=None, brain_region=['IT', 'V4'], cv=5, n_components=5, variation=[0, 3, 6], sortby='image_id', train_size=0.75): # var_lookup = stimulus_set[stimulus_set.variation.isin(variation)].image_id.values # x = x.where(x.image_id.isin(var_lookup),drop=True) # nd = neural_data.where(neural_data.image_id.isin(var_lookup),drop=True) x = x.sortby(sortby) nd = neural_data.sortby(sortby) assert list(getattr(x, sortby).values) == list(getattr(nd, sortby).values) num_images = x.shape[0] out_recs = [] cv_tr = [] cv_te = [] kf = KFold(n_splits=cv, shuffle=True, random_state=cv) for tr, te in kf.split(np.arange(num_images)): cv_tr.append(tr) cv_te.append(te) for rand_delta in np.arange(cv): tr_idx, te_idx, _, _ = train_test_split( np.arange(num_images), np.arange(num_images), train_size=train_size, random_state=np.random.randint(0, 50) + rand_delta) cv_tr.append(tr_idx) cv_te.append(te_idx) for br in brain_region: nd_reg = nd.sel(region=br) if region is None: region = np.unique(x.region.values) for reg in region: if reg == 'pixel': continue x_reg = x.sel(region=reg) depth = np.unique(x_reg.layer.values)[0] with tqdm(zip(np.arange(cv), cv_tr, cv_te), total=cv) as t: t.set_description('{}{} x {}{}'.format(reg, x_reg.shape, br, nd_reg.shape)) r_mean = [] fve_mean = [] cca_mean = [] for n, tr, te in t: cca = CCA(n_components=n_components) cca.fit(x_reg.values[tr], nd_reg.values[tr]) u, v = cca.transform(x_reg.values[te], nd_reg.values[te]) y_pred = cca.predict(x_reg.values[te]) y_true = nd_reg.values[te] fve = explained_variance_score(y_true, y_pred, multioutput='raw_values') r_vals = [ pearsonr(y_pred[:, i], y_true[:, i]) for i in range(y_pred.shape[-1]) ] cca_r = np.mean([ pearsonr(u[:, i], v[:, i]) for i in np.arange(n_components) ]) # r_vals = [pearsonr(ab_vec[0][:,i],ab_vec[1][:,i]) for i in range(ab_vec[0].shape[-1])] r_mean.append(np.mean([r for r, v in r_vals])) cca_mean.append(cca_r) fve_mean.append(np.mean(fve)) for rv, f, nid in zip(r_vals, fve, nd_reg[te].neuroid_id.values): out_recs.append({ 'region': br, 'layer': reg, 'pearsonr': rv[0], 'cca_r': cca_r, 'fve': f, 'iter': n, 'depth': depth, 'neuroid_id': nid, }) t.set_postfix(pearson=np.mean(r_mean), cca=np.mean(cca_mean), fve=np.mean(fve_mean)) return pd.DataFrame.from_records(out_recs)
def cca_ds(A, B, n_components=1): model = CCA(n_components=n_components, scale=False).fit(B, A) return model.coefs, model.predict(B)
class CcaClass: """ Name : CCA Attribute : None Method : predict, predict_by_cv, save_model """ def __init__(self): # 알고리즘 이름 self._name = 'cca' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = CCA() # 모델 학습 self._model.fit(self._x_train, self._y_train) # 데이터 전처리 def preprocessing(self, data): # 학습 x = [] # 레이블 y = [] # 기준점(7일) base_interval = 7 # 기온 temps = list(data["temperature"]) for i in range(len(temps)): if i < base_interval: continue y.append(temps[i]) xa = [] for p in range(base_interval): d = i + p - base_interval xa.append(temps[d]) x.append(xa) return x, y # 일반 예측 def predict(self, save_img=False, show_chart=False): # 예측 y_pred = self._model.predict(self._x_test) # 스코어 정보 score = r2_score(self._y_test, y_pred) # 리포트 확인 if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'): print(f'Coef = {self._model.coef_}') print(f'intercept = {self._model.intercept_}') print(f'Score = {score}') # 이미지 저장 여부 if save_img: self.save_chart_image(y_pred, show_chart) # 예측 값 & 스코어 return [list(y_pred.tolist()), score] # CV 예측(Cross Validation) def predict_by_cv(self): # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현 return False # GridSearchCV 예측 def predict_by_gs(self): pass # 모델 저장 및 갱신 def save_model(self, renew=False): # 모델 저장 if not renew: # 처음 저장 joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') else: # 기존 모델 대체 if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'): os.rename( self._f_path + f'/model/{self._name}_rg.pkl', self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl') joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') # 회귀 차트 저장 def save_chart_image(self, data, show_chart): # 사이즈 plt.figure(figsize=(15, 10), dpi=100) # 레이블 plt.plot(self._y_test, c='r') # 예측 값 plt.plot(data, c='b') # 이미지로 저장 plt.savefig('./chart_images/tenki-kion-lr.png') # 차트 확인(Optional) if show_chart: plt.show() def __del__(self): del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model