def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url: str) -> List[Claim]: claim = Claim() claim.set_url(url) claim.set_source("checkyourfact") # title title = parsed_claim_review_page.find('article').find("h1") claim.set_title(title.text.replace("FACT CHECK: ", "")) url_date = url.replace("https://checkyourfact.com/", "").replace("/", " ").split(" ") claim.set_date(url_date[0] + "-" + url_date[1] + "-" + url_date[2]) # author & author_url if parsed_claim_review_page.select('detail > article > author'): for author in parsed_claim_review_page.select( 'detail > article > author'): if (hasattr(author, "data-slug")): author_str = author.text.split("|")[0].strip().split( "\n")[0] claim.author = author_str claim.author_url = "https://checkyourfact.com/author/" + author[ 'data-slug'] break # body body = parsed_claim_review_page.find("article") claim.set_body(body.get_text()) # related links div_tag = parsed_claim_review_page.find("article") related_links = [] for link in div_tag.findAll('a', href=True): related_links.append(link['href']) claim.set_refered_links(related_links) claim.set_claim(claim.title) # rating rating = find_by_text(parsed_claim_review_page, "Verdict", "span") if rating: rating_text = rating[0].text.split(":")[-1].strip() claim.set_rating(rating_text) else: pass tags = [] for tag in parsed_claim_review_page.findAll( 'meta', {"property": "article:tag"}): tags.append(tag["content"]) claim.set_tags(", ".join(tags)) if len(claim.rating) == 0: return [] else: return [claim]
def extract_claim_and_review(self, parsed_claim_review_page: BeautifulSoup, url: str) -> List[Claim]: claim = Claim() claim.set_url(url) claim.set_source("checkyourfact") # title title = parsed_claim_review_page.find('article').find("h1") claim.set_title(title.text.replace("FACT CHECK: ", "")) url_date = url.replace("https://checkyourfact.com/", "").replace("/", " ").split(" ") claim.set_date(url_date[0] + "-" + url_date[1] + "-" + url_date[2]) # body body = parsed_claim_review_page.find("article") claim.set_body(body.get_text()) # related links div_tag = parsed_claim_review_page.find("article") related_links = [] for link in div_tag.findAll('a', href=True): related_links.append(link['href']) claim.set_refered_links(related_links) claim.set_claim(claim.title) # rating rating = find_by_text(parsed_claim_review_page, "Verdict", "span") if rating: rating_text = rating[0].text.split(":")[-1].strip() claim.set_alternate_name(rating_text) else: pass tags = [] for tag in parsed_claim_review_page.findAll( 'meta', {"property": "article:tag"}): tags.append(tag["content"]) claim.set_tags(", ".join(tags)) if len(claim.alternate_name) == 0: return [] else: return [claim]