def test_parse(context, mocker): url = "http://example.org/" result = context.http.get(url) data = result.serialize() mocker.patch.object(context, "emit") rules = {'pattern': 'https://httpbin.org/*'} context.params["store"] = rules context.params["meta"] = { "title": ".//h1", "description": ".//p" } parse(context, data) assert context.emit.call_count == 1 context.emit.assert_called_once_with(rule="fetch", data=ANY) # cleanup tags conn = connect_redis() conn.flushall() context.http.result = None context.params["store"] = None parse(context, data) assert data['url'] == 'https://www.iana.org/domains/example' assert data['title'] == 'Example Domain' assert data['description'].startswith('This domain is for') assert context.emit.call_count == 3, data
def test_parse_ftm(context, mocker): url = "https://www.occrp.org/en/daily/14082-riviera-maya-gang-members-sentenced-in-romania" result = context.http.get(url) data = result.serialize() context.params["schema"] = "Article" context.params["properties"] = { "title": './/meta[@property="og:title"]/@content', "author": './/meta[@name="author"]/@content', "publishedAt": './/*[@class="date"]/text()', "description": './/meta[@property="og:description"]/@content', } parse(context, data) props = data["properties"] assert "Riviera Maya Gang Members Sentenced in Romania" in props["title"] assert "Attila Biro" in props["author"] assert props["description"][0].startswith("A Bucharest court")
def test_parse(context, mocker): url = "http://example.org/" result = context.http.get(url) data = result.serialize() with mocker.patch.object(context, "emit"): rules = {'pattern': 'https://httpbin.org/*'} context.params["store"] = rules parse(context, data) assert context.emit.call_count == 1 context.emit.assert_called_once_with( rule="fetch", data={"url": "https://www.iana.org/domains/example"}) # cleanup tags conn = connect_redis() conn.flushall() with mocker.patch.object(context, "emit"): context.http.result = None context.params["store"] = None parse(context, data) assert context.emit.call_count == 2, data
def test_parse(context, mocker): url = "http://example.org/" result = context.http.get(url) data = result.serialize() mocker.patch.object(context, "emit") rules = {"pattern": "https://httpbin.org/*"} context.params["store"] = rules context.params["meta"] = {"title": ".//h1", "description": ".//p"} parse(context, data) assert context.emit.call_count == 1 context.emit.assert_called_once_with(rule="fetch", data=ANY) # cleanup tags tags.delete() context.http.result = None context.params["store"] = None parse(context, data) assert data["url"] == "https://www.iana.org/domains/example" assert data["title"] == "Example Domain" assert data["description"].startswith("This domain is for") assert context.emit.call_count == 3, data