async def test_parser_should_find_grammar_info_in_german_verbs( examples_downloader: FileCache, ): url = get_search_url(query="bringen", src="de", dst="en", guess_direction=False) page_html = await examples_downloader.download(url) page = XExtractParser().parse_search_result_to_page(page_html) assert page.lemmas[0].grammar_info == "Akk"
async def test_parse_to_dict_should_return_parseable_result( examples_downloader: FileCache, query: str, src: LANGUAGE_CODE, dst: LANGUAGE_CODE, ): url = get_search_url(query=query, src=src, dst=dst, guess_direction=False) page = await examples_downloader.download(url) XExtractParser().parse_search_result_to_page(page)
async def test_parser_should_process_examples_without_links( examples_downloader: FileCache, ): url = get_search_url(query="einfach", src="de", dst="en", guess_direction=False) page_html = await examples_downloader.download(url) page = XExtractParser().parse_search_result_to_page(page_html) sources = page.external_sources assert all([s.src_url.startswith("http") for s in sources]) assert all([s.dst_url.startswith("http") for s in sources])
async def test_parse_autocompletions_should_return_autocompletions( examples_downloader: FileCache, ): url = get_autocompletions_url(query="katz", src="de", dst="en") page = await examples_downloader.download(url) parser = XExtractParser() parse_result = parser.parse_autocompletions(page) a = Autocompletions.AutocompletionItem t = Autocompletions.AutocompletionItem.TranslationItem first_item = a( text="Katze", pos="f", translations=[ t(text="cat", pos="n"), t(text="feline", pos="n"), t(text="crab", pos="n"), ], ) assert parse_result.autocompletions[0] == first_item
async def test_parser_should_find_correction( examples_downloader: FileCache, query: str, src: LANGUAGE_CODE, dst: LANGUAGE_CODE, correction: Optional[str], ): url = get_search_url(query=query, src=src, dst=dst, guess_direction=False) page = await examples_downloader.download(url) assert XExtractParser().find_correction(page) == correction
async def test_parser_should_detect_not_found( examples_downloader: FileCache, query: str, src: LANGUAGE_CODE, dst: LANGUAGE_CODE, is_not_found: bool, ): url = get_search_url(query=query, src=src, dst=dst, guess_direction=False) page = await examples_downloader.download(url) assert XExtractParser().is_not_found(page) == is_not_found
async def test_parser_should_find_translation_examples( examples_downloader: FileCache, ): url = get_search_url(query="obrigado", src="pt", dst="en", guess_direction=False) page_html = await examples_downloader.download(url) page = XExtractParser().parse_search_result_to_page(page_html) examples_of_1st_translation = page.lemmas[0].translations[0].examples assert examples_of_1st_translation is not None assert len(examples_of_1st_translation) == 1 assert examples_of_1st_translation[0].src == ( "Obrigado por sua participação em nossa pesquisa." ) assert examples_of_1st_translation[0].dst == ( "Thank you for your participation in our survey." )
def linguee_client(examples_downloader) -> LingueeClient: return LingueeClient( page_downloader=examples_downloader, page_parser=XExtractParser() )
from linguee_api.models import Autocompletions, ParseError, SearchResult from linguee_api.parsers import XExtractParser sentry_sdk.init(dsn=settings.sentry_dsn, environment=settings.sentry_environment) app = FastAPI( title="Linguee API", description=PROJECT_DESCRIPTION, version="2.0.0", ) app.add_middleware(SentryAsgiMiddleware) page_downloader = MemoryCache(upstream=FileCache( cache_directory=settings.cache_directory, upstream=HTTPXDownloader())) client = LingueeClient(page_downloader=page_downloader, page_parser=XExtractParser()) @app.get("/", include_in_schema=False) def index(): return RedirectResponse("/docs") @app.get( "/api/v2/translations", status_code=status.HTTP_200_OK, responses={ status.HTTP_200_OK: { "model": list[SearchResult.Lemma] }, status.HTTP_500_INTERNAL_SERVER_ERROR: {