def test_parse_document_raises_if_uri_not_a_string(es_annotation_doc): es_annotation_doc["_source"]["target"][0]["source"] = 52 with pytest.raises(util.InvalidAnnotationError) as exc: util.parse_document(es_annotation_doc) assert exc.value.reason == "uri_not_a_string"
def test_parse_document_raises_if_no_uri(): with pytest.raises(util.InvalidAnnotationError) as exc: util.parse_document({ "_id": "annotation_id", "_source": {} # No "uri". }) assert exc.value.reason == "annotation_has_no_uri"
def test_parse_document_raises_if_no_uri(es_annotation_doc): del es_annotation_doc["_source"]["target"][0]["source"] with pytest.raises(util.InvalidAnnotationError) as exc: util.parse_document(es_annotation_doc) assert exc.value.reason == "annotation_has_no_uri"
def test_parse_document_raises_if_uri_not_a_string(): with pytest.raises(util.InvalidAnnotationError) as exc: util.parse_document({ "_id": "annotation_id", "_source": {"target": [{"source": 52}]} # "uri" isn't a string. }) assert exc.value.reason == "uri_not_a_string"
def test_parse_document_raises_when_uri_from_web_uri_not_string_for_pdfs(): with pytest.raises(util.InvalidAnnotationError) as exc: util.parse_document( { "_id": "annotation_id", "_source": {"target": [{"source": "urn:x-pdf:the-fingerprint"}], "document": {"web_uri": 52}}, } ) assert exc.value.reason == "uri_not_a_string"
def test_parse_document_raises_when_uri_from_web_uri_not_string_for_pdfs( es_annotation_doc): es_annotation_doc["_source"]["target"][0][ "source"] = "urn:x-pdf:the-fingerprint" es_annotation_doc["_source"]["document"] = {"web_uri": 52} with pytest.raises(util.InvalidAnnotationError) as exc: util.parse_document(es_annotation_doc) assert exc.value.reason == "uri_not_a_string"
def test_parse_document_raises_if_annotated_deleted(): # When an annotation is deleted in h it isn't immediately removed from the # search index. Its Elasticsearch document is temporarily updated to just # {'deleted': True}. with pytest.raises(util.DeletedAnnotationError) as exc: util.parse_document({ "_id": "annotation_id", "_source": { "deleted": True, }, })
def test_parse_document_raises_if_uri_not_a_string(): with pytest.raises(util.InvalidAnnotationError) as exc: util.parse_document({ "_id": "annotation_id", "_source": { "target": [{ "source": 52 }] } # "uri" isn't a string. }) assert exc.value.reason == "uri_not_a_string"
def test_parse_document_raises_when_uri_from_web_uri_not_string_for_pdfs(): with pytest.raises(util.InvalidAnnotationError) as exc: util.parse_document({ "_id": "annotation_id", "_source": { "target": [{ "source": "urn:x-pdf:the-fingerprint" }], "document": { "web_uri": 52 } } }) assert exc.value.reason == "uri_not_a_string"
def test_parse_document_returns_annotation_id(): annotation_id = util.parse_document({ "_id": "annotation_id", "_source": {"target": [{"source": "http://example.com/example.html"}]} })[0] assert annotation_id == "annotation_id"
def test_parse_document_returns_document_uri_from_web_uri_when_pdf(es_annotation_doc): es_annotation_doc["_source"]["target"][0]["source"] = "urn:x-pdf:the-fingerprint" es_annotation_doc["_source"]["document"] = {"web_uri": "http://example.com/foo.pdf"} document_uri = util.parse_document(es_annotation_doc)["document_uri"] assert document_uri == "http://example.com/foo.pdf"
def test_parse_document_returns_boilerplate_quote_when_no_quote( es_annotation_doc, selector ): if selector: es_annotation_doc["_source"]["target"][0]["selector"] = selector quote = util.parse_document(es_annotation_doc)["quote"] assert quote == "Hypothesis annotation for example.com"
def test_parse_document_returns_document_uri(): document_uri = util.parse_document({ "_id": "annotation_id", "_source": {"target": [{"source": "http://example.com/example.html"}]} })[1] assert document_uri == "http://example.com/example.html"
def test_parse_document_returns_quote(es_annotation_doc): es_annotation_doc["_source"]["target"][0]["selector"] = [{ "type": "TextQuoteSelector", "exact": "test_quote", }] quote = util.parse_document(es_annotation_doc)["quote"] assert quote == "test_quote"
def annotation(self): settings = self.request.registry.settings try: document = util.elasticsearch_client(settings).get( index=settings["elasticsearch_index"], doc_type="annotation", id=self.request.matchdict["id"]) except exceptions.NotFoundError: statsd.incr("views.annotation.404.annotation_not_found") raise httpexceptions.HTTPNotFound(_("Annotation not found")) try: annotation_id, document_uri = util.parse_document(document) except util.InvalidAnnotationError as exc: statsd.incr("views.annotation.422.{}".format(exc.reason)) raise httpexceptions.HTTPUnprocessableEntity(str(exc)) # Remove any existing #fragment identifier from the URI before we # append our own. document_uri = parse.urldefrag(document_uri)[0] if not (document_uri.startswith("http://") or document_uri.startswith("https://")): statsd.incr("views.annotation.422.not_an_http_or_https_document") raise httpexceptions.HTTPUnprocessableEntity( _("Sorry, but it looks like this annotation was made on a " "document that is not publicly available.")) via_url = "{via_base_url}/{uri}#annotations:{id}".format( via_base_url=settings["via_base_url"], uri=document_uri, id=annotation_id) extension_url = "{uri}#annotations:{id}".format(uri=document_uri, id=annotation_id) parsed_url = parse.urlparse(document_uri) pretty_url = parsed_url.netloc[:NETLOC_MAX_LENGTH] if len(parsed_url.netloc) > NETLOC_MAX_LENGTH: pretty_url = pretty_url + jinja2.Markup("…") statsd.incr("views.annotation.200.annotation_found") return { "data": json.dumps({ # Warning: variable names change from python_style to # javaScriptStyle here! "chromeExtensionId": settings["chrome_extension_id"], "viaUrl": via_url, "extensionUrl": extension_url, }), "pretty_url": pretty_url }
def test_parse_document_returns_document_uri(): document_uri = util.parse_document({ "_id": "annotation_id", "_source": { "target": [{ "source": "http://example.com/example.html" }] } })[1] assert document_uri == "http://example.com/example.html"
def test_parse_document_returns_annotation_id(): annotation_id = util.parse_document({ "_id": "annotation_id", "_source": { "target": [{ "source": "http://example.com/example.html" }] } })[0] assert annotation_id == "annotation_id"
def test_parse_document_returns_document_uri_from_web_uri_when_pdf(): document_uri = util.parse_document( { "_id": "annotation_id", "_source": { "target": [{"source": "urn:x-pdf:the-fingerprint"}], "document": {"web_uri": "http://example.com/foo.pdf"}, }, } )[1] assert document_uri == "http://example.com/foo.pdf"
def annotation(self): settings = self.request.registry.settings try: document = util.elasticsearch_client(settings).get( index=settings["elasticsearch_index"], doc_type="annotation", id=self.request.matchdict["id"]) except exceptions.NotFoundError: statsd.incr("views.annotation.404.annotation_not_found") raise httpexceptions.HTTPNotFound(_("Annotation not found")) try: annotation_id, document_uri = util.parse_document(document) except util.InvalidAnnotationError as exc: statsd.incr("views.annotation.422.{}".format(exc.reason)) raise httpexceptions.HTTPUnprocessableEntity(str(exc)) # Remove any existing #fragment identifier from the URI before we # append our own. document_uri = parse.urldefrag(document_uri)[0] if not (document_uri.startswith("http://") or document_uri.startswith("https://")): statsd.incr("views.annotation.422.not_an_http_or_https_document") raise httpexceptions.HTTPUnprocessableEntity( _("Sorry, but it looks like this annotation was made on a " "document that is not publicly available.")) via_url = "{via_base_url}/{uri}#annotations:{id}".format( via_base_url=settings["via_base_url"], uri=document_uri, id=annotation_id) extension_url = "{uri}#annotations:{id}".format( uri=document_uri, id=annotation_id) parsed_url = parse.urlparse(document_uri) pretty_url = parsed_url.netloc[:NETLOC_MAX_LENGTH] if len(parsed_url.netloc) > NETLOC_MAX_LENGTH: pretty_url = pretty_url + jinja2.Markup("…") statsd.incr("views.annotation.200.annotation_found") return { "data": json.dumps({ # Warning: variable names change from python_style to # javaScriptStyle here! "chromeExtensionId": settings["chrome_extension_id"], "viaUrl": via_url, "extensionUrl": extension_url, }), "pretty_url": pretty_url }
def test_parse_document_returns_document_uri_from_web_uri_when_pdf(): document_uri = util.parse_document({ "_id": "annotation_id", "_source": { "target": [{ "source": "urn:x-pdf:the-fingerprint" }], "document": { "web_uri": "http://example.com/foo.pdf" } } })[1] assert document_uri == "http://example.com/foo.pdf"
def test_parse_document_returns_text(es_annotation_doc): es_annotation_doc["_source"]["text"] = "test_text" text = util.parse_document(es_annotation_doc)["text"] assert text == "test_text"
def test_parse_document_returns_authority(es_annotation_doc): authority = util.parse_document(es_annotation_doc)["authority"] assert authority == "hypothes.is"
def test_parse_document_returns_annotation_id(es_annotation_doc): annotation_id = util.parse_document(es_annotation_doc)["annotation_id"] assert annotation_id == "annotation_id"
def test_parse_document_returns_document_uri(es_annotation_doc): document_uri = util.parse_document(es_annotation_doc)["document_uri"] assert document_uri == "http://example.com/example.html"
def annotation(self): settings = self.request.registry.settings try: document = self.request.es.get( index=settings["elasticsearch_index"], doc_type="annotation", id=self.request.matchdict["id"], ) except exceptions.NotFoundError: raise httpexceptions.HTTPNotFound(_("Annotation not found")) try: parsed_document = util.parse_document(document) authority = parsed_document["authority"] annotation_id = parsed_document["annotation_id"] document_uri = parsed_document["document_uri"] show_metadata = parsed_document["show_metadata"] quote = parsed_document["quote"] text = parsed_document["text"] except util.DeletedAnnotationError: raise httpexceptions.HTTPNotFound(_("Annotation not found")) except util.InvalidAnnotationError as exc: raise httpexceptions.HTTPUnprocessableEntity(str(exc)) # Remove any existing #fragment identifier from the URI before we # append our own. document_uri = parse.urldefrag(document_uri)[0] if not _is_valid_http_url(document_uri): raise httpexceptions.HTTPUnprocessableEntity( _( "Sorry, but it looks like this annotation was made on a " "document that is not publicly available." ) ) via_url = None if _can_use_proxy(settings, authority=authority) and not url_embeds_client( document_uri ): via_url = "{via_base_url}/{uri}#annotations:{id}".format( via_base_url=settings["via_base_url"], uri=document_uri, id=annotation_id, ) extension_url = "{uri}#annotations:{id}".format( uri=document_uri, id=annotation_id ) pretty_url = util.get_pretty_url(document_uri) title = util.get_boilerplate_quote(document_uri) return { "data": json.dumps( { # Warning: variable names change from python_style to # javaScriptStyle here! "chromeExtensionId": settings["chrome_extension_id"], "viaUrl": via_url, "extensionUrl": extension_url, } ), "show_metadata": show_metadata, "pretty_url": pretty_url, "quote": quote, "text": text, "title": title, }
def test_parse_document_returns_boilerplate_when_no_text(es_annotation_doc): text = util.parse_document(es_annotation_doc)["text"] assert text == util.ANNOTATION_BOILERPLATE_TEXT
def test_parse_document_returns_show_metadata_true_when_shared_and_world( es_annotation_doc): show_metadata = util.parse_document(es_annotation_doc)["show_metadata"] assert show_metadata is True