def recall_document(title, url, uuid, text): doc = None if url: try: doc = SearchDocument.objects.lookup_by_url(url) except SearchDocument.DoesNotExist: pass if uuid and not doc: try: doc = SearchDocument.objects.get(uuid=uuid) except SearchDocument.DoesNotExist: pass if text and not doc: doc = SearchDocument.objects.filter(text=text) if len(doc) > 0: doc = doc[0] if not doc: raise SearchDocument.DoesNotExist() # If the extractor fails to extract a title, this will copy a title from # a later submission from an improved extractor. if title and not doc.title: doc.title = title doc.save() return doc
def fetch_and_store(url): try: (title, text) = fetch_and_clean(url) except Exception: raise Http404(url) doc = SearchDocument() doc.url = url doc.title = title doc.text = text doc.save() return doc
url = doc.url text = doc.text except UnicodeDecodeError: raise except SearchDocument.DoesNotExist: pass except Exception, e: return HttpResponseServerError(str(e)) if not doc: if not text: return HttpResponseNotFound(str(url or uuid)) else: doc = SearchDocument() doc.text = text if title: doc.title = title if url: doc.url = url ua_string = request.META.get('HTTP_USER_AGENT') if ua_string is not None: doc.user_agent = ua_string[:255] doc.save() # The actual proxying: response = execute_search(doc, doctype) if isinstance(response, HttpResponse): return response