def test_no_overwrite(self): task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs()) with patch('edx.analytics.tasks.database_imports.HivePartitionTarget') as mock_target: output = mock_target() # Make MagicMock act more like a regular mock, so that flatten() does the right thing. del output.__iter__ del output.__getitem__ output.exists = Mock(return_value=False) self.assertFalse(task.complete()) self.assertTrue(output.exists.called) output.exists = Mock(return_value=True) self.assertTrue(task.complete()) self.assertTrue(output.exists.called)
def test_no_overwrite(self): task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs()) with patch('edx.analytics.tasks.database_imports.HivePartitionTarget' ) as mock_target: output = mock_target() # Make MagicMock act more like a regular mock, so that flatten() does the right thing. del output.__iter__ del output.__getitem__ output.exists = Mock(return_value=False) self.assertFalse(task.complete()) self.assertTrue(output.exists.called) output.exists = Mock(return_value=True) self.assertTrue(task.complete()) self.assertTrue(output.exists.called)
def test_query_with_date_interval(self): task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs()) query = task.query() expected_query = textwrap.dedent(""" USE default; DROP TABLE IF EXISTS last_country_of_user; CREATE EXTERNAL TABLE last_country_of_user ( country_name STRING,country_code STRING,username STRING ) PARTITIONED BY (dt STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 's3://output/path'; ALTER TABLE last_country_of_user ADD PARTITION (dt = '2014-01-01'); """) self.assertEquals(query, expected_query)
def test_query_with_date_interval(self): task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs()) query = task.query() expected_query = textwrap.dedent( """ USE default; DROP TABLE IF EXISTS last_country_of_user; CREATE EXTERNAL TABLE last_country_of_user ( country_name STRING,country_code STRING,username STRING ) PARTITIONED BY (dt STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 's3://output/path'; ALTER TABLE last_country_of_user ADD PARTITION (dt = '2014-01-01'); """ ) self.assertEquals(query, expected_query)
def requires(self): return ImportLastCountryOfUserToHiveTask( mapreduce_engine=self.mapreduce_engine, n_reduce_tasks=self.n_reduce_tasks, source=self.source, interval=self.interval, pattern=self.pattern, geolocation_data=self.geolocation_data, overwrite=self.overwrite, )
def requires(self): """ This task reads from auth_user, auth_user_profile, and last_country_of_user, so require that they be loaded into Hive (via MySQL loads into Hive or via the pipeline as needed). """ return [ ImportAuthUserTask(overwrite=self.overwrite, destination=self.warehouse_path), ImportAuthUserProfileTask(overwrite=self.overwrite, destination=self.warehouse_path), ImportLastCountryOfUserToHiveTask( overwrite=self.overwrite, interval=self.interval, user_country_output=self.user_country_output, n_reduce_tasks=self.n_reduce_tasks) ]
def test_requires(self): task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs()) required_task = task.requires() self.assertEquals(required_task.output().path, 's3://output/path/dt=2014-01-01')
def test_overwrite(self): kwargs = self._get_kwargs() kwargs['overwrite'] = True task = ImportLastCountryOfUserToHiveTask(**kwargs) self.assertFalse(task.complete())
def test_requires(self): task = ImportLastCountryOfUserToHiveTask(**self._get_kwargs()) required_task = task.requires() self.assertEquals( required_task.output().path, 's3://fake/warehouse/last_country_of_user/dt=2014-01-01')