示例#1
0
def test_parse(context, mocker):
    url = "http://example.org/"
    result = context.http.get(url)
    data = result.serialize()

    mocker.patch.object(context, "emit")

    rules = {'pattern': 'https://httpbin.org/*'}
    context.params["store"] = rules
    context.params["meta"] = {
        "title": ".//h1",
        "description": ".//p"
    }
    parse(context, data)
    assert context.emit.call_count == 1
    context.emit.assert_called_once_with(rule="fetch", data=ANY)

    # cleanup tags
    conn = connect_redis()
    conn.flushall()

    context.http.result = None
    context.params["store"] = None
    parse(context, data)
    assert data['url'] == 'https://www.iana.org/domains/example'
    assert data['title'] == 'Example Domain'
    assert data['description'].startswith('This domain is for')
    assert context.emit.call_count == 3, data
def test_parse_ftm(context, mocker):
    url = "https://www.occrp.org/en/daily/14082-riviera-maya-gang-members-sentenced-in-romania"
    result = context.http.get(url)
    data = result.serialize()
    context.params["schema"] = "Article"
    context.params["properties"] = {
        "title": './/meta[@property="og:title"]/@content',
        "author": './/meta[@name="author"]/@content',
        "publishedAt": './/*[@class="date"]/text()',
        "description": './/meta[@property="og:description"]/@content',
    }

    parse(context, data)

    props = data["properties"]

    assert "Riviera Maya Gang Members Sentenced in Romania" in props["title"]
    assert "Attila Biro" in props["author"]
    assert props["description"][0].startswith("A Bucharest court")
示例#3
0
def test_parse(context, mocker):
    url = "http://example.org/"
    result = context.http.get(url)
    data = result.serialize()

    with mocker.patch.object(context, "emit"):
        rules = {'pattern': 'https://httpbin.org/*'}
        context.params["store"] = rules
        parse(context, data)
        assert context.emit.call_count == 1
        context.emit.assert_called_once_with(
            rule="fetch", data={"url": "https://www.iana.org/domains/example"})

    # cleanup tags
    conn = connect_redis()
    conn.flushall()

    with mocker.patch.object(context, "emit"):
        context.http.result = None
        context.params["store"] = None
        parse(context, data)
        assert context.emit.call_count == 2, data
示例#4
0
def test_parse(context, mocker):
    url = "http://example.org/"
    result = context.http.get(url)
    data = result.serialize()

    mocker.patch.object(context, "emit")

    rules = {"pattern": "https://httpbin.org/*"}
    context.params["store"] = rules
    context.params["meta"] = {"title": ".//h1", "description": ".//p"}
    parse(context, data)
    assert context.emit.call_count == 1
    context.emit.assert_called_once_with(rule="fetch", data=ANY)

    # cleanup tags
    tags.delete()

    context.http.result = None
    context.params["store"] = None
    parse(context, data)
    assert data["url"] == "https://www.iana.org/domains/example"
    assert data["title"] == "Example Domain"
    assert data["description"].startswith("This domain is for")
    assert context.emit.call_count == 3, data