Пример #1
0
    def test_find_scraperConfigByUrl(self):
        # Given
        dao = TinyConfigDao(path=TEST_VALID_CONFIGURATION_PATH)

        # ---------- Test 1 ----------

        scraperUrl_01 = "https://www.test_scraper_config_01.com"
        # Expect data of the fixture file
        expectedConfig_01 = ScraperConfig(
            iterSleepFromScnds=8,
            iterSleepToScnds=15,
            iterSleepSteps=0.5,
            fetchTimeoutScnds=8,
            fetchMaxRetries=4,
            fetchUseRandomProxy=True,
            postTimeoutScnds=7,
            postMaxRetries=3,
            postUseRandomProxies=True)

        # When
        with dao as sut:
            foundConfig_01 = sut.find(scraperConfigByUrl=scraperUrl_01)
        # Then
        self.assertEqual(expectedConfig_01, foundConfig_01)

        # ---------- Test 2 ----------

        # Given
        scraperUrl_02 = "https://www.test_scraper_config_02.com"
        # Expect data of the fixture file
        expectedConfig_02 = ScraperConfig(
            iterSleepFromScnds=7,
            iterSleepToScnds=16,
            iterSleepSteps=1.0,
            fetchTimeoutScnds=5,
            fetchMaxRetries=5,
            fetchUseRandomProxy=False,
            postTimeoutScnds=9,
            postMaxRetries=2,
            postUseRandomProxies=False)

        # When
        with dao as sut:
            foundConfig_02 = sut.find(scraperConfigByUrl=scraperUrl_02)
        # Then
        self.assertEqual(expectedConfig_02, foundConfig_02)
Пример #2
0
    def test_init(self):
        # When / Then
        try:
            ScraperConfig(
                iterSleepFromScnds=1,
                iterSleepToScnds=2,
                iterSleepSteps=0.3,
                fetchTimeoutScnds=10,
                fetchMaxRetries=5,
                fetchUseRandomProxy=False,
                postTimeoutScnds=6,
                postMaxRetries=3,
                postUseRandomProxies=True)

        except Exception as e:
            self.fail("Expected initialization of ScraperConfig to run without "
                      f"errors, but raised {e}")
Пример #3
0
    def test_findScraperCommonConfig(self):
        # Given
        sut = ConfigRepo(dao=self.testConfigDao)
        # Expect data of the fixture file
        expectedDefaultConfig = ScraperConfig(
            iterSleepFromScnds=25,
            iterSleepToScnds=35,
            iterSleepSteps=1.0,
            fetchTimeoutScnds=8,
            fetchMaxRetries=5,
            fetchUseRandomProxy=True,
            postTimeoutScnds=7,
            postMaxRetries=4,
            postUseRandomProxies=True)

        # When
        foundDefaultConfig = sut.findScraperCommonConfig()

        # Then
        self.assertEqual(expectedDefaultConfig, foundDefaultConfig)
Пример #4
0
    def test_findScraperConfigByUrl(self):
        # Given
        sut = ConfigRepo(dao=self.testConfigDao)
        scraperUrl = "https://www.test_scraper_config_01.com"
        # Expect data of the fixture file
        expectedConfig = ScraperConfig(
            iterSleepFromScnds=8,
            iterSleepToScnds=15,
            iterSleepSteps=0.5,
            fetchTimeoutScnds=8,
            fetchMaxRetries=4,
            fetchUseRandomProxy=True,
            postTimeoutScnds=7,
            postMaxRetries=3,
            postUseRandomProxies=True)

        # When
        foundConfig = sut.findScraperConfigByUrl(url=scraperUrl)

        # Then
        self.assertEqual(expectedConfig, foundConfig)
Пример #5
0
    def test_find_scraperCommonConfig_shouldFallbackToRescueDefaultsIfNotFound(self):
        # Given
        dao = TinyConfigDao(path=TEST_EMPTY_DATABASE_CONFIGURATION_PATH)
        # Expect hard coded data
        expectedRescueConfig = ScraperConfig(
            iterSleepFromScnds=20,
            iterSleepToScnds=30,
            iterSleepSteps=0.5,
            fetchTimeoutScnds=8,
            fetchMaxRetries=4,
            fetchUseRandomProxy=True,
            postTimeoutScnds=8,
            postMaxRetries=4,
            postUseRandomProxies=True)

        # When
        with dao as sut:
            foundDefaultConfig = sut.find(scraperCommonConfig=True)

        # Then
        self.assertEqual(expectedRescueConfig, foundDefaultConfig)
Пример #6
0
    def test_find_scraperCommonConfig(self):
        # Given
        dao = TinyConfigDao(path=TEST_VALID_CONFIGURATION_PATH)
        # Expect data of the fixture file
        expectedDefaultConfig = ScraperConfig(
            iterSleepFromScnds=25,
            iterSleepToScnds=35,
            iterSleepSteps=1.0,
            fetchTimeoutScnds=8,
            fetchMaxRetries=5,
            fetchUseRandomProxy=True,
            postTimeoutScnds=7,
            postMaxRetries=4,
            postUseRandomProxies=True)

        # When
        with dao as sut:
            foundDefaultConfig = sut.find(scraperCommonConfig=True)

        # Then
        self.assertEqual(expectedDefaultConfig, foundDefaultConfig)
Пример #7
0
    def test_find_scraperConfigByUrl_ShouldFallbackToPersistentDefault(self):
        # Given
        dao = TinyConfigDao(path=TEST_VALID_CONFIGURATION_PATH)
        nonExistingScraperConfigUrl = "https://www.this-scraper-default-does-not-exist.org"
        # Expect data of the fixture file
        expectedDefaultConfig = ScraperConfig(
            iterSleepFromScnds=25,
            iterSleepToScnds=35,
            iterSleepSteps=1.0,
            fetchTimeoutScnds=8,
            fetchMaxRetries=5,
            fetchUseRandomProxy=True,
            postTimeoutScnds=7,
            postMaxRetries=4,
            postUseRandomProxies=True)

        # When
        with dao as sut:
            foundDefaultConfig = sut.find(scraperConfigByUrl=nonExistingScraperConfigUrl)
        # Then
        self.assertEqual(expectedDefaultConfig, foundDefaultConfig)