def go_twitter(self, lang, count, keyword, since, until): try: print("=" * 10 + "Start Find Twitter" + "=" * 10) read_data = ManageFile("Twitter", keyword + "_Ncut" + lang, ["time", "content", "places"], "r") csv_data = read_data.managefile_main() df_in = pandas.DataFrame(csv_data) # มีของวันไหนบ้าง condition1 = (df_in[0] >= f"{since} 00:00:00") condition2 = (df_in[0] <= f"{until} 23:59:59") temp = [] # temp เก็บวันที่มีในไฟล์นั้นๆ df_out = df_in[0][condition1 & condition2].str.split(" ").apply( lambda x: temp.append(x[0]) if x[0] not in temp else None) for i in range(len(temp)): temp[i] = datetime.strptime(str(temp[i]), "%Y-%m-%d") temp.sort(reverse=True) # -------------------- set since and until time ----------------------- now = datetime.now() past = now - timedelta(days=7) now = datetime.strptime(str(now).split(" ")[0], "%Y-%m-%d") past = datetime.strptime(str(past).split(" ")[0], "%Y-%m-%d") until_new = until since_new = since temp_until = datetime.strptime(until_new, "%Y-%m-%d") temp_since = datetime.strptime(since_new, "%Y-%m-%d") if (temp_until >= temp_since): # set until date if (temp_until > now and temp_since > now): return None else: if (now > temp_until): until_new = until_new else: until_new = str(now).split(" ")[0] # set since date if (temp_until < past and temp_since < past): return None else: if (past < temp_since): since_new = since_new else: since_new = str(past).split(" ")[0] else: return None # --------------------------------------------------------------------- # --------------------- if can't find data ------------------ if (temp == []): #print(since_new, until_new, "DO IT",3) print(since_new, until_new, "DO IT") self.main_twitter(lang, count, keyword, since_new, until_new) return None # -------------------------------------------------------- ######################### only Time period that programe can search ############################# new_array = [] end = None for k in temp: if (k <= now and k >= now - timedelta(days=7)): new_array.append(k) #print(new_array,4) ################################################################################################## # -------------------------------- find starting time ------------------- point = None if (datetime.strptime(until_new, "%Y-%m-%d") not in new_array): # บวก 1 วันเป็นช่วงอ้างอิงให้หาวันเมื่อวาน point = datetime.strptime(until_new, "%Y-%m-%d") + timedelta(days=1) else: point = datetime.strptime(until_new, "%Y-%m-%d") point = point.strftime("%Y-%m-%d") point = datetime.strptime(point, "%Y-%m-%d") #print(point,5) # ----------------------------------------------------------------------- # ------------------------------- find ending time --------------------- if (since_new not in new_array): # กลับไปวันนึงคือการเอาวันพรุ่งนี้ end = datetime.strptime(since_new, "%Y-%m-%d") - timedelta(days=1) new_array.append(end) #print(new_array,6) # ---------------------------------------------------------------------- # ------------------------ find specific time -------------------------- for point_stop in new_array: start = point - timedelta(days=1) stop = point_stop + timedelta(days=1) if (start >= stop): start = str(start).split(" ")[0] stop = str(stop).split(" ")[0] print(start, stop, "DO IT") self.main_twitter(lang, count, keyword, stop, start) else: print(start, stop, "DO NOT DO IT") point = point_stop # ---------------------------------------------------------------------- except IndexError: pass
def cut_text(self, folder, keyword, column, lang, since, until): # -----------------------read file for content----------------------- # เอาไฟล์ที่เลือกเวลาแล้วมาตัวคำ read = None if (folder == "WebCrawler"): read = ManageFile(folder, keyword + "_cut" + lang, column, "r") elif (folder == "Twitter"): read_data = ManageFile(folder, keyword + "_Ncut" + lang, column, "r") # -----------------------อ่านไฟล์เป็น pandas----------------------- csv_data = read_data.managefile_main() pd_data = pandas.DataFrame(csv_data) # -------------------------------------------------------------- # -----------------------เลือกเวลา----------------------- data_ = self.read_time(folder, pd_data, since, until) # ----------------------------------------------------- # -----------------------เขียนไฟล์ชั่วคราว----------------------- data_str = data_.to_csv(index=False) #print(data_str) write_file = open(read_data.path + "\\" + keyword + "_cut" + lang + ".csv", "w", newline="") write_file.write(data_str) write_file.close() # ----------------------------------------------------------- read = ManageFile(folder, keyword + "_cut" + lang, column, "r") else: read = ManageFile(folder, keyword + "_cut" + lang, column, "r") data = read.managefile_main() write_sort_text = ManageFile( "GUI_show", keyword + "_ranking_" + str(folder).lower() + lang, ["keyword", "number"], "w") write_sort_text_all = ManageFile("GUI_show", keyword + "_ranking_all" + lang, ["keyword", "number"], "w") # ------------------------------------------------------------------- # ------------------------------column------------------------------- column_section = 0 if (folder == "WebCrawler"): column_section = 2 elif (folder == "Twitter"): column_section = 1 # ------------------------------------------------------------------- print( "*****************************************" + folder + " Start SENTIMENT & NLP*****************************************") sort_dict = Counter() first = 0 start = time.time() for i in data: # (1) cut text by nlp and do sentiment in the same time if (first > 0): cut1 = self.nlp_main.main_nlp(i[column_section]) if (folder == "WebCrawler"): self.array_sentiment_web.append( self.sentiment_text(cut1, i[column_section], lang)) elif (folder == "Twitter"): self.array_sentiment_twi.append( self.sentiment_text(cut1, i[column_section], lang)) self.array_sentiment.append( self.sentiment_text(cut1, i[column_section], lang)) print(len(self.array_sentiment)) sort_dict += Counter(cut1) first += 1 print( first, time.time() - start, "*****************************************" + folder + " END SENTIMENT & NLP*****************************************") print("ALL: " + str(len(self.array_sentiment)) + ", Twitter:" + str(len(self.array_sentiment_twi)) + ", WebCrawler:" + str(len(self.array_sentiment_web))) # (2) sort word and write file that can use for show in GUI for w in sorted(sort_dict, key=sort_dict.get, reverse=True)[:11]: if (w.lower() != keyword): write_sort_text.managefile_main([w, sort_dict[w]]) write_sort_text_all.managefile_main([w, sort_dict[w]])
import unittest from NLP_4test import NLP from manage_file import ManageFile import traceback import emoji import csv import time from datetime import datetime, timedelta from pythainlp.corpus.common import thai_words import pandas from bs4 import BeautifulSoup import random writefile = ManageFile("Test_write_file", "test", [0, 1, 2], "a") class Test(unittest.TestCase): def setUp(self): self.nlp = NLP() self.start = time.time() # **********************************************write file********************************************** def test_managefile_main(self): row_len_old = -1 read = open("Test_write_file/" + "test" + ".csv", "r") reader = csv.reader((line.replace('\0', '') for line in read), delimiter=",") for i in reader: row_len_old += 1