def __init__(self, number: int): self.approval_certificate = ApprovalCertificate.objects( number=number).first() if self.approval_certificate is None: self.approval_certificate = ApprovalCertificate(number=number) soup = self.fetch_website(f"https://consultaca.com/{number}") self.parse_website(soup) self.approval_certificate.save()
class ApprovalCertificateExtractor(BaseExtractor): def __init__(self, number: int): self.approval_certificate = ApprovalCertificate.objects( number=number).first() if self.approval_certificate is None: self.approval_certificate = ApprovalCertificate(number=number) soup = self.fetch_website(f"https://consultaca.com/{number}") self.parse_website(soup) self.approval_certificate.save() def parse_website(self, soup: BeautifulSoup): try: title = soup.title.get_text().lower() except (IndexError, AttributeError) as exception: raise ErrorParsingWebsite from exception text = nlp(title) detected_labels = {entity.label_: entity.text for entity in text.ents} ac_class = _find_first(["PFF3", "PFF2", "PFF1"], detected_labels) if ac_class is None: return self.approval_certificate.ac_class = ac_class try: self.approval_certificate.valid = (soup.find( "div", { "id": "box_result" }).find_all("p")[7].span.get_text().lower() == "válido") self.approval_certificate.manufacturer = title.split( " - ")[2].split(" ")[0].lower() except (IndexError, AttributeError) as exception: raise ErrorParsingWebsite from exception self.approval_certificate.good_ac = True
def mocked_extract_approval_certificate(self: RespiratorExtractor, ac_real: int, ac_candidate: str, _: str = None): try: number = int(ac_candidate.replace(".", "")) except (ValueError, AttributeError): return False if ac_real == number: self.respirator.approval_certificate = ApprovalCertificate( number=number, good_ac=True) return True return False
def setup_class(cls): ApprovalCertificate.drop_collection() Respirator.drop_collection()
def invalid_approval_certificate(self): approval_certificate = ApprovalCertificate(number=777, manufacturer="test", good_ac=False) return approval_certificate.save()
def approval_certificate(self): approval_certificate = ApprovalCertificate(number=666, manufacturer="test", good_ac=True) return approval_certificate.save()