def __init__(self,corpus,n_components=2,kernel=None): StyloClassifier.__init__(self,corpus) data = self.data_frame[self.cols].values self.n_components = n_components self.kernel = kernel if not kernel: self.pca = PCA(n_components=self.n_components) else: self.pca = KernelPCA(kernel=kernel, gamma=10) self.pca_data = self.pca.fit_transform(StandardScaler().fit_transform(data))
def __init__(self,corpus,num_train=-1,num_val=-1,n_components=2,kernel=None,random_state=None, n_clusters=-1,max_iter=300,n_init=10,init='k-means++',precompute_distances=True,tol=1e-4,n_jobs=1): self.kernel = kernel self.n_components = n_components StyloClassifier.__init__(self,corpus,num_train=num_train,num_val=num_val) if n_clusters < 0: n_clusters = len(set(self.data_frame["Author"])) self.stylo_pca = StyloPCA(corpus,n_components=n_components,kernel=kernel) self.k_means = KMeans(n_clusters=n_clusters,n_init=n_init,init=init, precompute_distances=precompute_distances,tol=tol,n_jobs=n_jobs) rs = 42 self.X = self.data_frame[self.cols].values self.y = self.data_frame[self.pred_col].values if random_state != None: rs = random_state self.Xr, self.Xt, self.yr, self.yt = train_test_split(self.X, self.y, train_size=self.num_train, test_size=self.num_val, random_state=rs)