示例#1
0
文件: vsm.py 项目: yq911122/module
    def __init__(self, x, y, stop_words=None):
        super(vsm, self).__init__()
        '''
		:x : pd.Series; trainset, each element as a list of words after pre-processing
		:y : pd.Series; labels
		:stop_words : list; stop words
		'''
        self.x, self.cv, self.tfidf = to_tfidf(x, stop_words)
        self.y = y
示例#2
0
文件: vsm.py 项目: yq911122/module
	def __init__(self, x, y, stop_words=None):
		super(vsm, self).__init__()
		'''
		:x : pd.Series; trainset, each element as a list of words after pre-processing
		:y : pd.Series; labels
		:stop_words : list; stop words
		'''
		self.x, self.cv, self.tfidf = to_tfidf(x, stop_words)
		self.y = y
示例#3
0
文件: vsm.py 项目: yq911122/module
    def predict(self, x):
        '''
		:x : pd.Series; testset, each element as a list of words after pre-processing
		
		:return : np.array; predicted labels 
		'''
        tfidf_matrix = to_tfidf(x, self.cv, self.tfidf)
        scores = pd.DataFrame(tfidf_matrix.dot(self.x.transpose()))
        max_scores_idx = scores.idmax(axis=0)
        label_idx = dict(zip(range(len(self.y)), list(self.y)))
        prediction = max_scores_idx.map(label_idx)
        return prediction
示例#4
0
文件: vsm.py 项目: yq911122/module
	def predict(self, x):
		'''
		:x : pd.Series; testset, each element as a list of words after pre-processing
		
		:return : np.array; predicted labels 
		'''
		tfidf_matrix = to_tfidf(x, self.cv, self.tfidf)
		scores = pd.DataFrame(tfidf_matrix.dot(self.x.transpose()))
		max_scores_idx = scores.idmax(axis=0)
		label_idx = dict(zip(range(len(self.y)),list(self.y)))
		prediction = max_scores_idx.map(label_idx)
		return prediction