def fit_to_feat(self, word_v, wchoice): # fits an ad to a feature vector, returns a weight vector vec = [] words = self.ad_to_words() stemmed_words = common.stem_low_wvec(words) words = common.strip_vec(words) # print words for word in word_v: if(wchoice == NUM): vec.append(float(words.count(word))) elif(wchoice == LOG_NUM): vec.append(math.log(float(words.count(word)))) return vec
def gen_word_vec(self, word_v, wchoice=W_CHOICE): # check generates a vector of words from AdVector, fits it to word_v vec = [] words = self.advec_to_words() stemmed_words = common.stem_low_wvec(words) words = common.strip_vec(words) # print words for word in word_v: if(wchoice == NUM): vec.append(float(words.count(word))) elif(wchoice == LOG_NUM): vec.append(math.log(float(words.count(word)))) return vec
def fit_to_feat(self, word_v, wchoice): # fits an news to a feature vector, returns a weight vector vec = [] words = self.news_to_words() stemmed_words = common.stem_low_wvec(words) words = common.strip_vec(words) # print words for word in word_v: if(wchoice == NUM): vec.append(float(words.count(word))) elif(wchoice == LOG_NUM): vec.append(math.log(float(words.count(word)))) return vec
def gen_word_vec(self, word_v, wchoice=W_CHOICE): # check generates a vector of words from NewsVector, fits it to word_v vec = [] words = self.newsvec_to_words() stemmed_words = common.stem_low_wvec(words) words = common.strip_vec(words) # print words for word in word_v: if(wchoice == NUM): vec.append(float(words.count(word))) elif(wchoice == LOG_NUM): vec.append(math.log(float(words.count(word)))) return vec
def word_vectors(list): # returns a frequency vector of words, when input a list of adVecs ad_union = adVector.AdVector() for ads in list: ad_union = ad_union.union(ads) words = ad_union.advec_to_words() stemmed_words = common.stem_low_wvec(words) filtered_words = [w for w in stemmed_words if not w in stopwords.words('english')] word_v = common.unique_words(filtered_words) word_v = common.strip_vec(word_v) wv_list = [] labels = [] for ads in list: wv_list.append(ads.gen_word_vec(word_v)) labels.append(ads.label) return wv_list, labels, word_v ## Returns word_v as feature
def word_vectors( list ): # returns a frequency vector of words, when input a list of adVecs ad_union = adVector.AdVector() for ads in list: ad_union = ad_union.union(ads) words = ad_union.advec_to_words() stemmed_words = common.stem_low_wvec(words) filtered_words = [ w for w in stemmed_words if not w in stopwords.words('english') ] word_v = common.unique_words(filtered_words) word_v = common.strip_vec(word_v) wv_list = [] labels = [] for ads in list: wv_list.append(ads.gen_word_vec(word_v)) labels.append(ads.label) return wv_list, labels, word_v ## Returns word_v as feature