Пример #1
0
    def go_twitter(self, lang, count, keyword, since, until):
        try:
            print("=" * 10 + "Start Find Twitter" + "=" * 10)
            read_data = ManageFile("Twitter", keyword + "_Ncut" + lang,
                                   ["time", "content", "places"], "r")

            csv_data = read_data.managefile_main()
            df_in = pandas.DataFrame(csv_data)

            # มีของวันไหนบ้าง
            condition1 = (df_in[0] >= f"{since} 00:00:00")
            condition2 = (df_in[0] <= f"{until} 23:59:59")

            temp = []  # temp เก็บวันที่มีในไฟล์นั้นๆ
            df_out = df_in[0][condition1 & condition2].str.split(" ").apply(
                lambda x: temp.append(x[0]) if x[0] not in temp else None)
            for i in range(len(temp)):
                temp[i] = datetime.strptime(str(temp[i]), "%Y-%m-%d")
            temp.sort(reverse=True)

            # -------------------- set since and until time -----------------------
            now = datetime.now()
            past = now - timedelta(days=7)
            now = datetime.strptime(str(now).split(" ")[0], "%Y-%m-%d")
            past = datetime.strptime(str(past).split(" ")[0], "%Y-%m-%d")

            until_new = until
            since_new = since
            temp_until = datetime.strptime(until_new, "%Y-%m-%d")
            temp_since = datetime.strptime(since_new, "%Y-%m-%d")
            if (temp_until >= temp_since):
                # set until date
                if (temp_until > now and temp_since > now):
                    return None
                else:
                    if (now > temp_until):
                        until_new = until_new
                    else:
                        until_new = str(now).split(" ")[0]
                # set since date
                if (temp_until < past and temp_since < past):
                    return None
                else:
                    if (past < temp_since):
                        since_new = since_new
                    else:
                        since_new = str(past).split(" ")[0]
            else:
                return None
            # ---------------------------------------------------------------------

            # --------------------- if can't find data ------------------
            if (temp == []):
                #print(since_new, until_new, "DO IT",3)
                print(since_new, until_new, "DO IT")
                self.main_twitter(lang, count, keyword, since_new, until_new)
                return None
            # --------------------------------------------------------

            ######################### only Time period that programe can search #############################
            new_array = []
            end = None
            for k in temp:
                if (k <= now and k >= now - timedelta(days=7)):
                    new_array.append(k)
            #print(new_array,4)
            ##################################################################################################

            # -------------------------------- find starting time -------------------
            point = None
            if (datetime.strptime(until_new, "%Y-%m-%d") not in new_array):
                # บวก 1 วันเป็นช่วงอ้างอิงให้หาวันเมื่อวาน
                point = datetime.strptime(until_new,
                                          "%Y-%m-%d") + timedelta(days=1)
            else:
                point = datetime.strptime(until_new, "%Y-%m-%d")
            point = point.strftime("%Y-%m-%d")
            point = datetime.strptime(point, "%Y-%m-%d")
            #print(point,5)
            # -----------------------------------------------------------------------

            # ------------------------------- find ending time ---------------------
            if (since_new not in new_array):
                # กลับไปวันนึงคือการเอาวันพรุ่งนี้
                end = datetime.strptime(since_new,
                                        "%Y-%m-%d") - timedelta(days=1)
                new_array.append(end)
            #print(new_array,6)
            # ----------------------------------------------------------------------

            # ------------------------ find specific time --------------------------
            for point_stop in new_array:

                start = point - timedelta(days=1)
                stop = point_stop + timedelta(days=1)
                if (start >= stop):
                    start = str(start).split(" ")[0]
                    stop = str(stop).split(" ")[0]
                    print(start, stop, "DO IT")
                    self.main_twitter(lang, count, keyword, stop, start)
                else:
                    print(start, stop, "DO NOT DO IT")

                point = point_stop
            # ----------------------------------------------------------------------
        except IndexError:
            pass
Пример #2
0
    def cut_text(self, folder, keyword, column, lang, since, until):
        # -----------------------read file for content-----------------------
        # เอาไฟล์ที่เลือกเวลาแล้วมาตัวคำ
        read = None
        if (folder == "WebCrawler"):
            read = ManageFile(folder, keyword + "_cut" + lang, column, "r")
        elif (folder == "Twitter"):
            read_data = ManageFile(folder, keyword + "_Ncut" + lang, column,
                                   "r")

            # -----------------------อ่านไฟล์เป็น pandas-----------------------
            csv_data = read_data.managefile_main()
            pd_data = pandas.DataFrame(csv_data)
            # --------------------------------------------------------------

            # -----------------------เลือกเวลา-----------------------
            data_ = self.read_time(folder, pd_data, since, until)
            # -----------------------------------------------------

            # -----------------------เขียนไฟล์ชั่วคราว-----------------------
            data_str = data_.to_csv(index=False)
            #print(data_str)
            write_file = open(read_data.path + "\\" + keyword + "_cut" + lang +
                              ".csv",
                              "w",
                              newline="")
            write_file.write(data_str)
            write_file.close()
            # -----------------------------------------------------------
            read = ManageFile(folder, keyword + "_cut" + lang, column, "r")
        else:
            read = ManageFile(folder, keyword + "_cut" + lang, column, "r")

        data = read.managefile_main()
        write_sort_text = ManageFile(
            "GUI_show", keyword + "_ranking_" + str(folder).lower() + lang,
            ["keyword", "number"], "w")
        write_sort_text_all = ManageFile("GUI_show",
                                         keyword + "_ranking_all" + lang,
                                         ["keyword", "number"], "w")

        # -------------------------------------------------------------------

        # ------------------------------column-------------------------------
        column_section = 0
        if (folder == "WebCrawler"):
            column_section = 2
        elif (folder == "Twitter"):
            column_section = 1
        # -------------------------------------------------------------------
        print(
            "*****************************************" + folder +
            " Start SENTIMENT & NLP*****************************************")
        sort_dict = Counter()
        first = 0
        start = time.time()
        for i in data:
            # (1) cut text by nlp and do sentiment in the same time
            if (first > 0):
                cut1 = self.nlp_main.main_nlp(i[column_section])

                if (folder == "WebCrawler"):
                    self.array_sentiment_web.append(
                        self.sentiment_text(cut1, i[column_section], lang))
                elif (folder == "Twitter"):
                    self.array_sentiment_twi.append(
                        self.sentiment_text(cut1, i[column_section], lang))
                self.array_sentiment.append(
                    self.sentiment_text(cut1, i[column_section], lang))
                print(len(self.array_sentiment))
                sort_dict += Counter(cut1)
            first += 1
        print(
            first,
            time.time() - start,
            "*****************************************" + folder +
            " END SENTIMENT & NLP*****************************************")
        print("ALL: " + str(len(self.array_sentiment)) + ", Twitter:" +
              str(len(self.array_sentiment_twi)) + ", WebCrawler:" +
              str(len(self.array_sentiment_web)))
        # (2) sort word and write file that can use for show in GUI
        for w in sorted(sort_dict, key=sort_dict.get, reverse=True)[:11]:
            if (w.lower() != keyword):
                write_sort_text.managefile_main([w, sort_dict[w]])
                write_sort_text_all.managefile_main([w, sort_dict[w]])
Пример #3
0
import unittest

from NLP_4test import NLP
from manage_file import ManageFile

import traceback
import emoji
import csv
import time
from datetime import datetime, timedelta
from pythainlp.corpus.common import thai_words
import pandas
from bs4 import BeautifulSoup
import random
writefile = ManageFile("Test_write_file", "test", [0, 1, 2], "a")


class Test(unittest.TestCase):
    def setUp(self):
        self.nlp = NLP()
        self.start = time.time()

    # **********************************************write file**********************************************
    def test_managefile_main(self):
        row_len_old = -1
        read = open("Test_write_file/" + "test" + ".csv", "r")
        reader = csv.reader((line.replace('\0', '') for line in read),
                            delimiter=",")
        for i in reader:
            row_len_old += 1