def fetch_html_document_form_external_url(self): fetcher = DownloadExternalResource(url=self.source_url) self.keywords.all().delete() # Ideally, here we should get response from cache. # Otherwise the request will be initiated again. data = fetcher.get_response_from_cache_or_raise_error() self.original_doc = data self.save()
def clean(self, *args, **kwargs): """ Performs 'clean' on models.URLField field. Also fetches the data from the URL and verifies it's content-type and size values. This method will re-fetch the data every time current method is called! Returns: str. If URLField validation is passed, it returns URL. Raises: forms.ValidationError """ # if URLField validation has passed, it should return URL url = super(ContentTypeRestrictedURLField, self).clean(*args, **kwargs) fetcher = DownloadExternalResource( url=url, max_file_size=self.max_upload_size, allowed_content_types=self.content_types) fetcher.send_request() error = fetcher.validate_response() if error: # first check if we have any exceptions raised ex = error.get('error') if ex: raise forms.ValidationError(ex) # now check if the file that is being fetched # has correct content-type and size csize = error.get('content_size') if csize: raise forms.ValidationError( _('Please keep filesize under %s. Current filesize %s') % (filesizeformat( self.max_upload_size), filesizeformat(csize))) ctype = error.get('content_type') if ctype: raise forms.ValidationError( _( 'Filetype "%s" not supported. Must be one of the following: %s' % (ctype, ', '.join(self.content_types)), )) return url
def clean(self, *args, **kwargs): """ Performs 'clean' on models.URLField field. Also fetches the data from the URL and verifies it's content-type and size values. This method will re-fetch the data every time current method is called! Returns: str. If URLField validation is passed, it returns URL. Raises: forms.ValidationError """ # if URLField validation has passed, it should return URL url = super(ContentTypeRestrictedURLField, self).clean(*args, **kwargs) fetcher = DownloadExternalResource(url=url, max_file_size=self.max_upload_size, allowed_content_types=self.content_types) fetcher.send_request() error = fetcher.validate_response() if error: # first check if we have any exceptions raised ex = error.get('error') if ex: raise forms.ValidationError(ex) # now check if the file that is being fetched # has correct content-type and size csize = error.get('content_size') if csize: raise forms.ValidationError(_('Please keep filesize under %s. Current filesize %s') % (filesizeformat(self.max_upload_size), filesizeformat(csize))) ctype = error.get('content_type') if ctype: raise forms.ValidationError(_('Filetype "%s" not supported. Must be one of the following: %s' % (ctype, ', '.join(self.content_types)),)) return url