def filter_ads(self, url): if filter.match(url): self.msg = 'Blacklisted url' self.status = False self.code = 808 return False else: return True
def valid_url(self): '''checking url format and validity''' for k, v in self.parse_link(self.url).items(): if k is None: continue if v is None or v == "": setattr(self, k, "") else: setattr(self, k, v) try: if self.scheme not in ACCEPTED_PROTOCOL: self.msg = 'URL: Wrong protocol %s' % self.scheme self.status = False self.code = 804 return self.status except Exception as e: logging.warning("%s" %str(e)) pass try: if self.filetype in BAD_TYPES: self.msg = 'URL: Invalid webpage type %s' % self.filetype self.status = False self.code = 806 return self.status except Exception as e: pass try: if self.domain in BAD_DOMAINS: self.msg = 'URL: Bad domain %s' % self.domain self.status = False self.code = 807 return self.status except Exception as e: logging.warning("%s" %str(e)) pass try: if self.subdomain in BAD_DOMAINS: self.msg = 'URL: Bad subdomain %s' % self.subdomain self.status = False self.code = 807 return self.status except Exception as e: logging.warning("%s" %str(e)) pass try: if self.path in BAD_PATHS: self.msg = 'URL: Bad path %s' % self.path self.status = False self.code = 807 return self.status except Exception as e: logging.warning("%s" %str(e)) pass if filter.match(self.url): self.msg = 'URL: Blacklisted url' self.status = False self.code = 808 return self.status return self