Exemplo n.º 1
0
	def __init__(self,corpus,n_components=2,kernel=None):
		StyloClassifier.__init__(self,corpus)
		data = self.data_frame[self.cols].values
		self.n_components = n_components
		self.kernel = kernel
		if not kernel:
			self.pca = PCA(n_components=self.n_components)
		else:
			self.pca = KernelPCA(kernel=kernel, gamma=10)
		self.pca_data = self.pca.fit_transform(StandardScaler().fit_transform(data))
Exemplo n.º 2
0
	def __init__(self,corpus,n_components=2,kernel=None):
		StyloClassifier.__init__(self,corpus)
		data = self.data_frame[self.cols].values
		self.n_components = n_components
		self.kernel = kernel
		if not kernel:
			self.pca = PCA(n_components=self.n_components)
		else:
			self.pca = KernelPCA(kernel=kernel, gamma=10)
		self.pca_data = self.pca.fit_transform(StandardScaler().fit_transform(data))
Exemplo n.º 3
0
	def __init__(self,corpus,num_train=-1,num_val=-1,n_components=2,kernel=None,random_state=None,
		n_clusters=-1,max_iter=300,n_init=10,init='k-means++',precompute_distances=True,tol=1e-4,n_jobs=1):
		self.kernel = kernel
		self.n_components = n_components
		StyloClassifier.__init__(self,corpus,num_train=num_train,num_val=num_val)
		if n_clusters < 0:
			n_clusters = len(set(self.data_frame["Author"]))
		self.stylo_pca = StyloPCA(corpus,n_components=n_components,kernel=kernel)
		self.k_means = KMeans(n_clusters=n_clusters,n_init=n_init,init=init,
			precompute_distances=precompute_distances,tol=tol,n_jobs=n_jobs)
		rs = 42
		self.X = self.data_frame[self.cols].values
		self.y = self.data_frame[self.pred_col].values
		if random_state != None:
			rs = random_state
		self.Xr, self.Xt, self.yr, self.yt = train_test_split(self.X, self.y, train_size=self.num_train, test_size=self.num_val, random_state=rs)
Exemplo n.º 4
0
	def __init__(self,corpus,num_train=-1,num_val=-1,n_components=2,kernel=None,random_state=None,
		n_clusters=-1,max_iter=300,n_init=10,init='k-means++',precompute_distances=True,tol=1e-4,n_jobs=1):
		self.kernel = kernel
		self.n_components = n_components
		StyloClassifier.__init__(self,corpus,num_train=num_train,num_val=num_val)
		if n_clusters < 0:
			n_clusters = len(set(self.data_frame["Author"]))
		self.stylo_pca = StyloPCA(corpus,n_components=n_components,kernel=kernel)
		self.k_means = KMeans(n_clusters=n_clusters,n_init=n_init,init=init,
			precompute_distances=precompute_distances,tol=tol,n_jobs=n_jobs)
		rs = 42
		self.X = self.data_frame[self.cols].values
		self.y = self.data_frame[self.pred_col].values
		if random_state != None:
			rs = random_state
		self.Xr, self.Xt, self.yr, self.yt = train_test_split(self.X, self.y, train_size=self.num_train, test_size=self.num_val, random_state=rs)