Пример #1
0
    def searching(self, keyword, lang, since, until):
        print("Start Crawler")
        column = ['time', 'header', 'content', 'link']
        check = ManageFile(
            fold_name="WebCrawler", file_name="", column_data=column,
            mode="a")  # file_name="" it's mean do not create file before.
        temp_until = datetime.strptime(until, "%Y-%m-%d")
        temp_since = datetime.strptime(since, "%Y-%m-%d")

        dif = temp_until - temp_since

        if (dif == timedelta(days=0)):
            dif = "0 day"
        print(dif)
        day = int(str(dif).split(" ")[0]) + 1
        array = []

        for i in range(day):
            date = str(temp_since + timedelta(days=i)).split(" ")[0]
            print(date)
            df = None
            if (lang == "en"):
                df = check.find_copy_to(keyword=keyword,
                                        reader="Database\\" + date,
                                        column=["link", "header"],
                                        condition=[self.DOMAIN_en, keyword],
                                        nlp=self.nlp_web)
            elif (lang == "th"):
                df = check.find_copy_to(keyword=keyword,
                                        reader="Database\\" + date,
                                        column=["link", "header"],
                                        condition=[self.DOMAIN_th, keyword],
                                        nlp=self.nlp_web)
            elif (lang == "all"):
                df = check.find_copy_to(keyword=keyword,
                                        reader="Database\\" + date,
                                        column=["link", "header"],
                                        condition=[self.DOMAIN, keyword],
                                        nlp=self.nlp_web)
            array.append(df)
        if (dif == "0 day"):
            array.append(pandas.DataFrame(columns=column))

        result = pandas.concat(array)
        target_file = open(check.path + "\\" + keyword + "_cut" + lang +
                           ".csv",
                           "w",
                           newline="")
        target_file.write(result.to_csv(index=False))
Пример #2
0
    def test_find_copy_tos(self):

        writefile = ManageFile("Test_write_file", "test", ["a", "b", "c"], "w")
        df = writefile.find_copy_to("1", "test", ["0", "4"], ["a", "b"],
                                    self.nlp)

        self.assertIs(type(df),
                      type(pandas.DataFrame(columns=["a", "b", "c"])))