Пример #1
0
def recipe_from_url(request):
    url = request.POST['url']

    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'  # noqa: E501
    }
    try:
        response = requests.get(url, headers=headers)
    except requests.exceptions.ConnectionError:
        return JsonResponse(
            {
                'error': True,
                'msg': _('The requested page could not be found.')
            },
            status=400)

    if response.status_code == 403:
        return JsonResponse(
            {
                'error':
                True,
                'msg':
                _('The requested page refused to provide any information (Status Code 403).'
                  )  # noqa: E501
            },
            status=400)
    return get_from_html(response.text, url)
Пример #2
0
    def test_ld_json(self):
        test_list = [
            {
                'file': 'cookbook/tests/resources/websites/ld_json_1.html',
                'result_length': 3128
            },
            {
                'file': 'cookbook/tests/resources/websites/ld_json_2.html',
                'result_length': 1450
            },
            {
                'file': 'cookbook/tests/resources/websites/ld_json_3.html',
                'result_length': 1545
            },
            {
                'file': 'cookbook/tests/resources/websites/ld_json_4.html',
                'result_length': 1657
            },
            {
                'file':
                'cookbook/tests/resources/websites/ld_json_invalid.html',
                'result_length': 115
            },
            {
                'file':
                'cookbook/tests/resources/websites/ld_json_itemList.html',
                'result_length': 3131
            },
            {
                'file':
                'cookbook/tests/resources/websites/ld_json_multiple.html',
                'result_length': 1546
            },
            {
                'file': 'cookbook/tests/resources/websites/micro_data_1.html',
                'result_length': 1022
            },
            {
                'file': 'cookbook/tests/resources/websites/micro_data_2.html',
                'result_length': 1384
            },
            {
                'file': 'cookbook/tests/resources/websites/micro_data_3.html',
                'result_length': 1100
            },
            {
                'file': 'cookbook/tests/resources/websites/micro_data_4.html',
                'result_length': 4231
            },
        ]

        for test in test_list:
            with open(test['file'], 'rb') as file:
                parsed_content = json.loads(
                    get_from_html(file.read(), 'test_url').content)
                self.assertEqual(len(str(parsed_content)),
                                 test['result_length'])
                file.close()
Пример #3
0
    def test_ld_json(self):
        test_list = [
            {
                'file': 'cookbook/tests/resources/websites/ld_json_1.html',
                'result_length': 3237
            },
            {
                'file': 'cookbook/tests/resources/websites/ld_json_2.html',
                'result_length': 1510
            },
            {
                'file': 'cookbook/tests/resources/websites/ld_json_3.html',
                'result_length': 1629
            },
            {
                'file': 'cookbook/tests/resources/websites/ld_json_4.html',
                'result_length': 1744
            },
            {
                'file':
                'cookbook/tests/resources/websites/ld_json_itemList.html',
                'result_length': 3206
            },
            {
                'file':
                'cookbook/tests/resources/websites/ld_json_multiple.html',
                'result_length': 1621
            },
            {
                'file': 'cookbook/tests/resources/websites/micro_data_1.html',
                'result_length': 1079
            },
            {
                'file': 'cookbook/tests/resources/websites/micro_data_2.html',
                'result_length': 1438
            },
            {
                'file': 'cookbook/tests/resources/websites/micro_data_3.html',
                'result_length': 1148
            },
            {
                'file': 'cookbook/tests/resources/websites/micro_data_4.html',
                'result_length': 4396
            },
        ]

        for test in test_list:
            with open(test['file'], 'rb') as file:
                print(
                    f'Testing {test["file"]} expecting length {test["result_length"]}'
                )
                parsed_content = json.loads(
                    get_from_html(file.read(), 'test_url').content)
                self.assertEqual(len(str(parsed_content)),
                                 test['result_length'])
                file.close()