Пример #1
0
def recall_document(title, url, uuid, text):
    doc = None

    if url:
        try:
            doc = SearchDocument.objects.lookup_by_url(url)
        except SearchDocument.DoesNotExist:
            pass

    if uuid and not doc:
        try:
            doc = SearchDocument.objects.get(uuid=uuid)
        except SearchDocument.DoesNotExist:
            pass

    if text and not doc:
        doc = SearchDocument.objects.filter(text=text)
        if len(doc) > 0:
            doc = doc[0]

    if not doc:
        raise SearchDocument.DoesNotExist()

    # If the extractor fails to extract a title, this will copy a title from
    # a later submission from an improved extractor.
    if title and not doc.title:
        doc.title = title
        doc.save()

    return doc
Пример #2
0
def fetch_and_store(url):
    try:
        (title, text) = fetch_and_clean(url)
    except Exception:
        raise Http404(url)

    doc = SearchDocument()
    doc.url = url
    doc.title = title
    doc.text = text
    doc.save()
    return doc
Пример #3
0
def fetch_and_store(url):
    try:
        (title, text) = fetch_and_clean(url)
    except Exception:
        raise Http404(url)

    doc = SearchDocument()
    doc.url = url
    doc.title = title
    doc.text = text
    doc.save()
    return doc
Пример #4
0
                url = doc.url
            text = doc.text
        except UnicodeDecodeError:
            raise

        except SearchDocument.DoesNotExist:
            pass

        except Exception, e:
            return HttpResponseServerError(str(e))

    if not doc:
        if not text:
            return HttpResponseNotFound(str(url or uuid))
        else:
            doc = SearchDocument()
            doc.text = text
            if title:
                doc.title = title
            if url:
                doc.url = url
            ua_string = request.META.get('HTTP_USER_AGENT')
            if ua_string is not None:
                doc.user_agent = ua_string[:255]
            doc.save()


    # The actual proxying:
    response = execute_search(doc, doctype)
    if isinstance(response, HttpResponse):
        return response
Пример #5
0
                url = doc.url
            text = doc.text
        except UnicodeDecodeError:
            raise

        except SearchDocument.DoesNotExist:
            pass

        except Exception, e:
            return HttpResponseServerError(str(e))

    if not doc:
        if not text:
            return HttpResponseNotFound(str(url or uuid))
        else:
            doc = SearchDocument()
            doc.text = text
            if title:
                doc.title = title
            if url:
                doc.url = url
            ua_string = request.META.get('HTTP_USER_AGENT')
            if ua_string is not None:
                doc.user_agent = ua_string[:255]
            doc.save()

    # The actual proxying:
    response = execute_search(doc, doctype)
    if isinstance(response, HttpResponse):
        return response