def writeToEventsFile(self, event_clust): print "Writing Events to a file" words_list = [] for index, row in event_clust.data.iterrows(): words_list += Cosine_Sim.tokenize_only(row['tweet_text']) word_counts = Counter(words_list) most_common = word_counts.most_common(10) text_file = open("events/Events.txt", "a") text_file.write("Cluster Id =" + str(event_clust.id) + " ,") for word, count in most_common: text_file.write("{0} : {1} ,".format(word, count)) text_file.write("\n") text_file.close() # write cluster to csv file clust_file = 'events/cluster_' + str(event_clust.id) + '.csv' if os.path.exists(clust_file): os.remove(clust_file) try: event_clust.data.to_csv(clust_file, index=False, encoding='utf-8') except: print " Error writing the Event File"
# print "working on tweet ", index # words_list = Cosine_Sim.tokenize_only(row['tweet_text']) # words_list_str = " ".join(words_list) # text_file.write(words_list_str) # text_file.close() # cnt = cnt + 1 cnt = 1 cluster_ids = [] files = glob.glob("\clusters_AvgSimilariy\*.csv") for f in files: print "working on file , ", f data = pd.read_csv(f, encoding='utf-8') cnt_str = f + "," for index, row in data.iterrows(): text_file = open(r"\tweets_avg\tweet" + str(cnt) + ".txt", "w") print "working on tweet ", index words_list = Cosine_Sim.tokenize_only(row['tweet_text']) words_list_str = " ".join(words_list) text_file.write(words_list_str) text_file.close() cnt_str = cnt_str + str(cnt) + "," cnt = cnt + 1 cluster_ids.append(cnt_str) text_file = open(r"\tweets_avg\tweet_clusters.txt", "w") for i in cluster_ids: text_file.write(i) text_file.write("\n") text_file.close()