Пример #1
0
	def fit_to_feat(self, word_v, wchoice):			# fits an ad to a feature vector, returns a weight vector
		vec = []
		words = self.ad_to_words()
		stemmed_words = common.stem_low_wvec(words)
		words = common.strip_vec(words)
		# print words
		for word in word_v:
			if(wchoice == NUM):
				vec.append(float(words.count(word)))
			elif(wchoice == LOG_NUM):
				vec.append(math.log(float(words.count(word))))
		return vec
Пример #2
0
 def gen_word_vec(self, word_v, wchoice=W_CHOICE):       # check generates a vector of words from AdVector, fits it to word_v
     vec = []
     words = self.advec_to_words()
     stemmed_words = common.stem_low_wvec(words)
     words = common.strip_vec(words)
     # print words
     for word in word_v:
         if(wchoice == NUM):
             vec.append(float(words.count(word)))
         elif(wchoice == LOG_NUM):
             vec.append(math.log(float(words.count(word))))
     return vec
Пример #3
0
	def fit_to_feat(self, word_v, wchoice):			# fits an news to a feature vector, returns a weight vector
		vec = []
		words = self.news_to_words()
		stemmed_words = common.stem_low_wvec(words)
		words = common.strip_vec(words)
		# print words
		for word in word_v:
			if(wchoice == NUM):
				vec.append(float(words.count(word)))
			elif(wchoice == LOG_NUM):
				vec.append(math.log(float(words.count(word))))
		return vec
Пример #4
0
	def gen_word_vec(self, word_v, wchoice=W_CHOICE):		# check generates a vector of words from NewsVector, fits it to word_v
		vec = []
		words = self.newsvec_to_words()
		stemmed_words = common.stem_low_wvec(words)
		words = common.strip_vec(words)
		# print words
		for word in word_v:
			if(wchoice == NUM):
				vec.append(float(words.count(word)))
			elif(wchoice == LOG_NUM):
				vec.append(math.log(float(words.count(word))))
		return vec
Пример #5
0
def word_vectors(list):                                 # returns a frequency vector of words, when input a list of adVecs
    ad_union = adVector.AdVector()
    for ads in list:
        ad_union = ad_union.union(ads)
    words = ad_union.advec_to_words()
    stemmed_words = common.stem_low_wvec(words)
    filtered_words = [w for w in stemmed_words if not w in stopwords.words('english')]
    word_v = common.unique_words(filtered_words)
    word_v = common.strip_vec(word_v)
    wv_list = []
    labels = []
    for ads in list:
        wv_list.append(ads.gen_word_vec(word_v))
        labels.append(ads.label)
    return wv_list, labels, word_v                      ## Returns word_v as feature
Пример #6
0
def word_vectors(
        list
):  # returns a frequency vector of words, when input a list of adVecs
    ad_union = adVector.AdVector()
    for ads in list:
        ad_union = ad_union.union(ads)
    words = ad_union.advec_to_words()
    stemmed_words = common.stem_low_wvec(words)
    filtered_words = [
        w for w in stemmed_words if not w in stopwords.words('english')
    ]
    word_v = common.unique_words(filtered_words)
    word_v = common.strip_vec(word_v)
    wv_list = []
    labels = []
    for ads in list:
        wv_list.append(ads.gen_word_vec(word_v))
        labels.append(ads.label)
    return wv_list, labels, word_v  ## Returns word_v as feature