Exemplo n.º 1
0
 def test_address_processor(self):
     crawler = DummyCrawler(addresses_as_links=True)
     config = Config(string=self.DUMMY_CONFIG)
     exposes = crawler._get_results("https://www.example.com/search")
     for expose in exposes:
         self.assertTrue(expose['address'].startswith('http'), "Expected addresses not yet to be processed")
     chain = ProcessorChain.builder(config) \
         .resolve_addresses([crawler]) \
         .build()
     exposes = chain.process(exposes)
     for expose in exposes:
         self.assertFalse(expose['address'].startswith('http'), "Expected addresses to be processed")
Exemplo n.º 2
0
 def test_addresses_are_processed_by_hunter(self):
     config = Config(string=self.DUMMY_CONFIG)
     hunter = Hunter(config, [DummyCrawler(addresses_as_links=True)], IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     for expose in exposes:
         self.assertFalse(expose['address'].startswith('http'), "Expected addresses to be processed by default")
Exemplo n.º 3
0
def test_ids_are_added_to_maintainer(mocker):
    config = Config(string=IdMaintainerTest.DUMMY_CONFIG)
    id_watch = IdMaintainer(":memory:")
    spy = mocker.spy(id_watch, "mark_processed")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    assert spy.call_count == 24
Exemplo n.º 4
0
def test_is_processed_works(mocker):
    config = Config(string=IdMaintainerTest.DUMMY_CONFIG)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    for expose in exposes:
        assert id_watch.is_processed(expose['id'])
Exemplo n.º 5
0
def test_exposes_are_returned_with_limit():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    saved = id_watch.get_recent_exposes(10)
    assert len(saved) == 10
    expose = saved[0]
    assert expose['title'] is not None
Exemplo n.º 6
0
def test_exposes_are_saved_to_maintainer():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    exposes = hunter.hunt_flats()
    assert count(exposes) > 4
    saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10))
    assert len(saved) > 0
    assert count(exposes) < len(saved)
Exemplo n.º 7
0
def test_exposes_are_returned_as_dictionaries():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    saved = id_watch.get_exposes_since(datetime.datetime.now() - datetime.timedelta(seconds=10))
    assert len(saved) > 0
    expose = saved[0]
    assert expose['title'] is not None
    assert expose['created_at'] is not None
Exemplo n.º 8
0
def test_exposes_are_returned_filtered():
    config = Config(string=IdMaintainerTest.CONFIG_WITH_FILTERS)
    id_watch = IdMaintainer(":memory:")
    hunter = Hunter(config, [DummyCrawler()], id_watch)
    hunter.hunt_flats()
    hunter.hunt_flats()
    filter = Filter.builder().max_size_filter(70).build()
    saved = id_watch.get_recent_exposes(10, filter_set=filter)
    assert len(saved) == 10
    for expose in saved:
        assert int(re.match(r'\d+', expose['size'])[0]) <= 70
Exemplo n.º 9
0
 def test_filter_min_size(self):
     min_size = 80
     config = Config(string=self.FILTER_MIN_SIZE_CONFIG)
     hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(
         filter(
             lambda expose: float(
                 re.search(r'\d+([\.,]\d+)?', expose['size'])[0]) <
             min_size, exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(
         len(unfiltered) == 0, "Expected small flats to be filtered")
Exemplo n.º 10
0
 def test_filter_max_price(self):
     max_price = 1000
     config = Config(string=self.FILTER_MAX_PRICE_CONFIG)
     hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(
         filter(
             lambda expose: float(
                 re.search(r'\d+([\.,]\d+)?', expose['price'])[0]) >
             max_price, exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(
         len(unfiltered) == 0, "Expected expensive flats to be filtered")
Exemplo n.º 11
0
 def test_filter_titles(self):
     titlewords = ["wg", "tausch", "flat", "ruhig", "gruen"]
     filteredwords = [
         "wg", "tausch", "wochenendheimfahrer", "pendler", "zwischenmiete"
     ]
     config = Config(string=self.FILTER_TITLES_CONFIG)
     hunter = Hunter(config, [DummyCrawler(titlewords)],
                     IdMaintainer(":memory:"))
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     unfiltered = list(
         filter(
             lambda expose: any(word in expose['title']
                                for word in filteredwords), exposes))
     if len(unfiltered) > 0:
         for expose in unfiltered:
             print("Got unfiltered expose: ", expose)
     self.assertTrue(len(unfiltered) == 0, "Expected words to be filtered")
 def test_resolve_durations(self, m):
     config = Config(string=self.DUMMY_CONFIG)
     hunter = Hunter(config, [DummyCrawler()], IdMaintainer(":memory:"))
     matcher = re.compile(
         'maps.googleapis.com/maps/api/distancematrix/json')
     m.get(
         matcher,
         text=
         '{"status": "OK", "rows": [ { "elements": [ { "distance": { "text": "far", "value": 123 }, "duration": { "text": "days", "value": 123 } } ] } ]}'
     )
     exposes = hunter.hunt_flats()
     self.assertTrue(count(exposes) > 4, "Expected to find exposes")
     without_durations = list(
         filter(lambda expose: 'durations' not in expose, exposes))
     if len(without_durations) > 0:
         for expose in without_durations:
             print("Got expose: ", expose)
     self.assertTrue(
         len(without_durations) == 0, "Expected durations to be calculated")