Exemplo n.º 1
0
def test_build_condition_codes(monkeypatch):
    html_files_by_url = {
        SUUMO_TOKYO_SEARCH_URL: DATA_DIR / "chintai_tokyo_search_page.html"
    }
    monkeypatch.setattr("otokuna.dumping.requests.get",
                        build_mock_requests_get(html_files_by_url))

    expected = {"ts": ["1"], "sc": ["13102", "13113"], "tc": ["0401303"]}
    assert _build_condition_codes(["マンション"], ["中央区", "渋谷区"],
                                  ["本日の新着物件"]) == expected
Exemplo n.º 2
0
def test_build_condition_codes_invalid_value(monkeypatch):
    html_files_by_url = {
        SUUMO_TOKYO_SEARCH_URL: DATA_DIR / "chintai_tokyo_search_page.html"
    }
    monkeypatch.setattr("otokuna.dumping.requests.get",
                        build_mock_requests_get(html_files_by_url))

    expected_error_msg = "invalid values for condition sc: {'あいうえお区'}"
    with pytest.raises(RuntimeError, match=expected_error_msg):
        _build_condition_codes(wards=["あいうえお区"])
Exemplo n.º 3
0
def test_iter_search_results(monkeypatch):
    html_files_by_url = {
        "dummyurl?page=1": DATA_DIR / "results_first_page.html",
        "dummyurl?page=2": DATA_DIR / "results_last_page.html"
    }
    monkeypatch.setattr("otokuna.dumping.requests.get",
                        build_mock_requests_get(html_files_by_url))
    monkeypatch.setattr("otokuna.dumping.time.sleep", lambda _: _)
    for page, response in iter_search_results("dummyurl", 2):
        pass
    assert page == 2
Exemplo n.º 4
0
def test_main(set_environ, monkeypatch):
    output_bucket = os.environ["OUTPUT_BUCKET"]

    job_id = "someuuid"
    timestamp = 1611154415.0
    user_id = "johndoe"
    search_url = "dummyurl"
    root_key = f"jobs/{job_id}"
    raw_data_key = f"jobs/{job_id}/property_data.zip"
    scraped_data_key = f"jobs/{job_id}/property_data.pickle"
    prediction_data_key = f"jobs/{job_id}/prediction.pickle"

    expected_search_conditions = "東京メトロ銀座線/虎ノ門 東京メトロ丸ノ内線/銀座 1LDK 30m2以上 オートロック"

    html_files_by_url = {
        search_url: DATA_DIR / "results_page_long_conditions.html"
    }
    monkeypatch.setattr("save_job_info.requests.get",
                        build_mock_requests_get(html_files_by_url))

    event = {
        "root_key": root_key,
        "job_id": job_id,
        "timestamp": timestamp,
        "user_id": user_id,
        "search_url": search_url,
        "raw_data_key": raw_data_key,
        "scraped_data_key": scraped_data_key,
        "prediction_data_key": prediction_data_key,
    }

    s3_client = boto3.client('s3')
    s3_client.create_bucket(Bucket=output_bucket)

    save_job_info.main(event, None)

    expected_job_info = {
        "job_id": job_id,
        "timestamp": timestamp,
        "user_id": user_id,
        "search_url": search_url,
        "search_conditions": expected_search_conditions,
        "raw_data_key": raw_data_key,
        "scraped_data_key": scraped_data_key,
        "prediction_data_key": prediction_data_key,
    }

    expected_job_info_key = f"jobs/{job_id}/job_info.json"
    key = "/".join([root_key, "job_info.json"])
    contents = s3_client.get_object(Bucket=output_bucket,
                                    Key=key)["Body"].read()
    assert json.loads(contents) == expected_job_info
    assert event["job_info_key"] == expected_job_info_key
Exemplo n.º 5
0
def test_build_search_url(monkeypatch):
    html_files_by_url = {
        SUUMO_TOKYO_SEARCH_URL: DATA_DIR / "chintai_tokyo_search_page.html"
    }
    monkeypatch.setattr("otokuna.dumping.requests.get",
                        build_mock_requests_get(html_files_by_url))

    search_url = build_search_url(building_categories=["マンション"],
                                  wards=["中央区", "渋谷区"],
                                  only_today=True)
    expected_query = {
        "ts": ["1"],
        "sc": ["13102", "13113"],
        "tc": ["0401303"],
    }
    assert expected_query.items() <= parse_qs(urlparse(search_url).query,
                                              keep_blank_values=True).items()