def _parse_date(self, text: str): thai_date_replace = { # Months "มกราคม": 1, "กุมภาพันธ์": 2, "มีนาคม": 3, "เมษายน": 4, "พฤษภาคม": 5, "พฤษภำคม": 5, "มิถุนายน": 6, "มิถุนำยน": 6, "กรกฎาคม": 7, "กรกฎำคม": 7, "สิงหาคม": 8, "สิงหำคม": 8, "กันยายน": 9, "ตุลาคม": 10, "พฤศจิกายน": 11, "ธันวาคม": 12, } date_raw = re.search(self.regex_date, text) day = clean_count(date_raw.group(1)) month = thai_date_replace[date_raw.group(2)] year = clean_count(date_raw.group(3)) - self._year_difference_conversion return clean_date(datetime(year, month, day))
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: dt = pd.to_datetime(df.week, format=DATE_FORMAT) dt = dt + timedelta(days=14) last_update = self._parse_last_update_date dt = dt.apply( lambda x: clean_date(min(x.date(), last_update), DATE_FORMAT)) df = df.assign(date=dt, ) return df.drop(columns=["week"])
def _propose_df(self): regex = r"Comunicado N° (\d{3,4}).*" data = [] for tweet in self.tweets: match = re.search(regex, tweet.full_text) if match: dt = clean_date( from_tz_to_tz(tweet.created_at, to_tz="America/Panama")) if self.stop_search(dt): break data.append({ "date": dt, "text": tweet.full_text, "source_url": self.build_post_url(tweet.id), "num": match.group(1), }) self.tweets_relevant.append(tweet) df = pd.DataFrame(data) df = df.drop_duplicates(subset=["num"], keep="last") return df
def _weekday_to_date(self, d): new_date = clean_date(d + "+5", "%Y-W%W+%w") if new_date > localdate("Europe/London"): new_date = clean_date(d + "+2", "%Y-W%W+%w") return new_date
def date_limit_one_dose_ddmmyyyy(self): return clean_date(self.date_limit_one_dose, "%Y-%m-%d", output_fmt="%d%m%Y")
def _get_num_gap_days(self, df_current): return (localdatenow(tz=None, as_datetime=True) - clean_date( df_current.date.max(), "%Y-%m-%d", as_datetime=True)).days
def _parse_date_from_link_title(self, title): match = re.search(r".*สรุปวัคซีน ประจำวันที่\s+(\d+) .* (25\d\d)", title).group(1, 2) year = int(match[1]) - self._year_difference_conversion return clean_date(f"{year}-{self._current_month}-{match[0]}", "%Y-%m-%d")
def _week_to_date(self, week: int) -> str: """Converts week to date.""" year = localdate("Asia/Amman", as_datetime=True).isocalendar().year date = clean_date(f"{year} {week} +5", "%Y %W +%w") return date