def test_requires(self, connect_s3_mock):
        s3_conn_mock = connect_s3_mock.return_value
        bucket_mock = s3_conn_mock.get_bucket.return_value

        class FakeKey(object):
            """A test double of the structure returned by boto when listing keys in an S3 bucket."""
            def __init__(self, path):
                self.key = path
                self.size = 10

        bucket_mock.list.return_value = [
            FakeKey(path) for path in self.SAMPLE_KEY_PATHS
        ]

        task = EventLogSelectionTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        expected_paths = [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ]

        self.assertItemsEqual(task.requires(), [
            UncheckedExternalURL(source + path) for path in expected_paths
            for source in self.SOURCE
        ])
    def test_filtering_of_urls(self):
        task = EventLogSelectionTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz'],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
        ])
    def test_multiple_filtering_of_urls(self):
        task = EventLogSelectionTask(
            source=self.SOURCE,
            interval=Month.parse('2014-03'),
            pattern=[
                r'.*?FakeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*?FakeEdgeServerGroup/tracking.log-(?P<date>\d{8}).*\.gz',
                r'.*tracking_\d{3,5}\.log\.gz$',
            ],
            expand_interval=datetime.timedelta(0),
        )

        self.assert_only_matched(task, [
            'FakeServerGroup/tracking.log-20140318.gz',
            'FakeServerGroup/tracking.log-20140319-1395256622.gz',
            'FakeEdgeServerGroup/tracking.log-20140324-1395670621.gz',
            'FakeOldServerGroup3/tracking_14602.log.gz',
        ])
 def test_default_pattern(self):
     task = EventLogSelectionTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.pattern, (
         r'.*tracking.log-(?P<date>\d{8}).*\.gz',
         r'.*tracking.notalog-(?P<date>\d{8}).*\.gz',
     ))
 def test_default_source(self):
     task = EventLogSelectionTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.source,
                       ('s3://fake/input/', 's3://fake/input2/'))
 def test_pattern_override(self):
     task = EventLogSelectionTask(interval=Month.parse('2014-03'),
                                  pattern=['baz'])
     self.assertEquals(task.pattern, ('baz', ))
 def test_pattern_from_config(self):
     task = EventLogSelectionTask(interval=Month.parse('2014-03'))
     self.assertEquals(task.pattern, ('foobar', ))