def pre_process(self): name = self.name file_list = self.file_list try: # Split text into a list of words list_ = [] x = 0 while x < len(file_list): num_tweets = Functions.num_of_tweets(file_list[x]) time_stamp = Functions.time_stamp(file_list[x]) target_doc = open(file_list[x], 'r') print("FILE: ",target_doc.name) res = [] for lines in target_doc: word_list = [] line = lines.lower() word = Functions.preprocess(line) for i in word: if i not in punctuation: word_list.append(i) # For testing purposes list_.append(i) #print("done") emo = EAC(name, word_list, file_list[x], time_stamp, num_tweets,res) emo.emotion_analysis() x += 1 return list_ except BaseException as e: print("Pre_process error: ", e)
def pre_process(self, file_list): try: # Split text into a list of words list_ = [] x=0 while x < len(file_list): num_tweets = Functions.num_of_tweets(file_list[x]) target_doc = open(file_list[x], 'r') time_stamp = Functions.time_stamp(file_list[x]) target_doc = open(file_list[x], 'r') for lines in target_doc: word_list =[] line = lines.lower() word = Functions.preprocess(line) for i in word: if i not in punctuation: word_list.append(i) # For testing purposes list_.append(i) Company.emotion_analysis(self,word_list,file_list[x],time_stamp,num_tweets) x += 1 print(list_) return list_ except BaseException as e: print("Pre_process error: ", e)
def emotion_measure(name): # read in any json file that comes in/ using glob for filename pattern matching source = 'Tracker\\'+name json_dir = source json_pattern = os.path.join(json_dir, '*.json') file_list = glob.glob(json_pattern) for file in file_list: print(file) target_doc = open(file, 'r') res = [] time_stamp = Functions.time_stamp(file) num_tweets = Functions.num_of_tweets(file) Functions.num_of_tweets(file) for lines in target_doc: # print(lines) line = lines.lower() word = twitterstreamV2.preprocess(line) source = 'Emotion\\' json_dir = source json_pattern = os.path.join(json_dir, '*.json') file_list = glob.glob(json_pattern) for file_emo in file_list: emo_doc = open(file_emo, 'r') load_file = json.load(emo_doc) for wd in word: # checks if the word exists in the dictionary and prints it out dict_words = load_file["words"][0] if wd in dict_words: id_ = load_file["id"] wrd = dict_words[wd] di_ct = {id_: wrd} res.append(di_ct) #print(wd) #print(di_ct) else: id_ = load_file["id"] wrd = 0 di_ct = {id_: wrd} res.append(di_ct) target_doc.close() #print(res) Functions.counting(res, file, time_stamp, num_tweets, name)