def searching(self, keyword, lang, since, until): print("Start Crawler") column = ['time', 'header', 'content', 'link'] check = ManageFile( fold_name="WebCrawler", file_name="", column_data=column, mode="a") # file_name="" it's mean do not create file before. temp_until = datetime.strptime(until, "%Y-%m-%d") temp_since = datetime.strptime(since, "%Y-%m-%d") dif = temp_until - temp_since if (dif == timedelta(days=0)): dif = "0 day" print(dif) day = int(str(dif).split(" ")[0]) + 1 array = [] for i in range(day): date = str(temp_since + timedelta(days=i)).split(" ")[0] print(date) df = None if (lang == "en"): df = check.find_copy_to(keyword=keyword, reader="Database\\" + date, column=["link", "header"], condition=[self.DOMAIN_en, keyword], nlp=self.nlp_web) elif (lang == "th"): df = check.find_copy_to(keyword=keyword, reader="Database\\" + date, column=["link", "header"], condition=[self.DOMAIN_th, keyword], nlp=self.nlp_web) elif (lang == "all"): df = check.find_copy_to(keyword=keyword, reader="Database\\" + date, column=["link", "header"], condition=[self.DOMAIN, keyword], nlp=self.nlp_web) array.append(df) if (dif == "0 day"): array.append(pandas.DataFrame(columns=column)) result = pandas.concat(array) target_file = open(check.path + "\\" + keyword + "_cut" + lang + ".csv", "w", newline="") target_file.write(result.to_csv(index=False))
def test_find_copy_tos(self): writefile = ManageFile("Test_write_file", "test", ["a", "b", "c"], "w") df = writefile.find_copy_to("1", "test", ["0", "4"], ["a", "b"], self.nlp) self.assertIs(type(df), type(pandas.DataFrame(columns=["a", "b", "c"])))