def convertSingle(self, user_id): friends = loadFollowingFromDBofUser(user_id) def _dummy(friend): # insert feature weight return (friend, 1) friends = map(_dummy, friends) return friends
def genCoeffTable(self, effectives, instances): ''' @param effectives 2d matrix about effective feature (follower ids) first dim is label, and second is follower id. @param instances A list of elements where each element is (user_id, label) ''' # we build a table to store the coefficients table = CoeffTable(effectives) # we do the following work: # load following list for the instance # load text for the instance # parse the text and make into words array for user_id, label in instances: # whenever this loop ends, we process a single user. text, length = self._readTextCrawled(user_id) converter = TextFeatureConverter() if text is None: continue words_arr = converter.textToSparseTokens(text, False) #words_arr = converter.normarlizeWordsArr(words_arr, # length) following = loadFollowingFromDBofUser(user_id) following = map(str, following) for w, value in words_arr: table.record(label, following, w, value) table.addTweetLength(label, following, length) # normalize the word counts. table.normalize() return table