def test_collection_diff(self) -> None: """Calculate the difference between mongo collections and importers.""" self.mongo_db.create_collection('missing-in-importers') self.mongo_db.create_collection('in-both') self.mongo_db.create_collection('in-both.2018-12-12_57ccf5b6d9be6') self.mongo_db.create_collection( 'old-collection.2018-12-12_57ccf5b6d9be6') all_importers = { 'no-import-needed': importers.Importer(name='no import needed', script=None, args=None, is_imported=False, run_every=None, proto_type=None, key=None, has_pii=False), 'missing-in-db-importer': importers.Importer(name='missing in db', script=None, args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), 'in-both': importers.Importer(name='in both', script=None, args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False) } diff = import_status.compute_collections_diff(all_importers, self.mongo_db) self.assertEqual({'missing-in-db-importer'}, set(diff.collection_missing)) self.assertEqual({'missing-in-importers'}, set(diff.importer_missing)) self.assertEqual({'in-both'}, set(diff.imported))
def test_details_no_import_needed(self, mock_log_info: mock.MagicMock) -> None: """Test no import needed.""" importer = importers.Importer(name='no import needed', script=None, args=None, is_imported=False, run_every=None, proto_type=None, key=None, has_pii=False) import_status.print_single_importer(importer, 'no-import-needed', []) mock_log_info.assert_any_call( 'No import needed for %s', termcolor.colored('no-import-needed', 'green'))
def test_details_basic_usage(self, mock_log_info: mock.MagicMock) -> None: """Basic usage.""" importer = importers.Importer(name='with command', script='run', args={'this': 'value'}, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False) import_status.print_single_importer(importer, 'foo', []) mock_log_info.assert_any_call( 'To import "%s" in "%s", run:\n%s', 'with command', 'foo', 'docker-compose run --rm -e MONGO_URL="<your mongo URL>" data-analysis-prepare \\\n' ' python bob_emploi/data_analysis/importer/run.py \\\n' ' --this "value" \\\n --mongo_collection "foo"\n')
associations=importers.IMPORTERS['associations'].updated_with_args( view='Export Bob UK'), best_jobs_in_area=importers.IMPORTERS['best_jobs_in_area']. updated_with_script(f'{_HERE}/best_jobs_in_area').updated_with_args( keep_old=False, geonames_admin_txt='data/uk/geonames_admin.txt', geonames_txt='data/uk/geonames.txt', salaries_by_region_2020_xls='data/uk/salaries_by_region_2020.xls', wards_ons_csv= 'data/uk/wards_counties_regions_local_authorities_2016.csv'), departements=importers.Importer( name='Basic information for UK local authorities', script=f'{_HERE}/local_authorities', args={ 'wards_counties_regions_local_authorities_csv': 'data/uk/wards_counties_regions_local_authorities_2016.csv', }, is_imported=True, run_every=None, proto_type=geo_pb2.Departement, key='Local Authority ID', has_pii=False), diagnostic_main_challenges=importers. IMPORTERS['diagnostic_main_challenges'].updated_with_args( view='Export Bob UK'), focus_emails=importers.IMPORTERS['focus_emails'].updated_with_args( view='Live in Bob UK'), local_diagnosis=importers.Importer( name='Local Diagnosis', script=f'{_HERE}/local_diagnosis', args={ 'postings_csv':
class ImportStatusBasicTests(unittest.TestCase): """Basic tests.""" def setUp(self) -> None: """Set up.""" patcher = mongomock.patch(_FAKE_MONGO_URL) patcher.start() self.addCleanup(patcher.stop) self.mongo_db = pymongo.MongoClient(_FAKE_MONGO_URL).get_database() def _find_log_call_matching(self, needle: str, logging_mock: mock.MagicMock) -> str: try: return next( flatten_log for call in logging_mock.call_args_list if needle in ( flatten_log := typing.cast(str, call[0][0] % call[0][1:]))) except StopIteration: self.fail( f'Call not found including "{needle}": {logging_mock.call_args_list}' ) @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer(name='Collection name', script='my-script-name', args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) @mongomock.patch(('test_url', )) def test_details_importer_missing(self) -> None: """Test missing importer.""" with self.assertRaises(argparse.ArgumentError): import_status.main(['unknown_collection']) @mock.patch(logging.__name__ + '.info') def test_details_no_import_needed(self, mock_log_info: mock.MagicMock) -> None: """Test no import needed.""" importer = importers.Importer(name='no import needed', script=None, args=None, is_imported=False, run_every=None, proto_type=None, key=None, has_pii=False) import_status.print_single_importer(importer, 'no-import-needed', []) mock_log_info.assert_any_call( 'No import needed for %s', termcolor.colored('no-import-needed', 'green')) @mock.patch(logging.__name__ + '.info') def test_details_basic_usage(self, mock_log_info: mock.MagicMock) -> None: """Basic usage.""" importer = importers.Importer(name='with command', script='run', args={'this': 'value'}, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False) import_status.print_single_importer(importer, 'foo', []) mock_log_info.assert_any_call( 'To import "%s" in "%s", run:\n%s', 'with command', 'foo', 'docker-compose run --rm -e MONGO_URL="<your mongo URL>" data-analysis-prepare \\\n' ' python bob_emploi/data_analysis/importer/run.py \\\n' ' --this "value" \\\n --mongo_collection "foo"\n') def test_collection_diff(self) -> None: """Calculate the difference between mongo collections and importers.""" self.mongo_db.create_collection('missing-in-importers') self.mongo_db.create_collection('in-both') self.mongo_db.create_collection('in-both.2018-12-12_57ccf5b6d9be6') self.mongo_db.create_collection( 'old-collection.2018-12-12_57ccf5b6d9be6') all_importers = { 'no-import-needed': importers.Importer(name='no import needed', script=None, args=None, is_imported=False, run_every=None, proto_type=None, key=None, has_pii=False), 'missing-in-db-importer': importers.Importer(name='missing in db', script=None, args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), 'in-both': importers.Importer(name='in both', script=None, args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False) } diff = import_status.compute_collections_diff(all_importers, self.mongo_db) self.assertEqual({'missing-in-db-importer'}, set(diff.collection_missing)) self.assertEqual({'missing-in-importers'}, set(diff.importer_missing)) self.assertEqual({'in-both'}, set(diff.imported)) def test_collection_meta(self) -> None: """Test basic usage of getting collection meta information.""" two_days_ago = datetime.datetime.now() - datetime.timedelta(days=2) self.mongo_db.create_collection('meta').insert_one({ '_id': 'test_collection', 'updated_at': two_days_ago, }) meta_info = import_status.get_meta_info(self.mongo_db) self.assertLessEqual(two_days_ago.replace(microsecond=0), meta_info['test_collection']['updated_at']) self.assertGreaterEqual(two_days_ago + datetime.timedelta(seconds=1), meta_info['test_collection']['updated_at']) @mock.patch(logging.__name__ + '.info') @mock.patch.dict(import_status.get_importers(), { 'missing-in-db-importer': importers.Importer(name='missing in db', script=None, args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), 'in-both': importers.Importer(name='in both', script=None, args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), 'in-both-with-meta': importers.Importer(name='in both with meta', script=None, args=None, is_imported=True, run_every=None, proto_type=job_pb2.JobGroup, key=None, has_pii=False), 'in-both-not-needed': importers.Importer(name='in both not needed', script=None, args=None, is_imported=False, run_every=None, proto_type=None, key=None, has_pii=False) }, clear=True) def test_main_function(self, mock_log_info: mock.MagicMock) -> None: """Basic usage.""" self.mongo_db.create_collection('missing-in-importers') self.mongo_db.create_collection('in-both') self.mongo_db.create_collection('in-both-with-meta') two_days_ago = (datetime.datetime.now() - datetime.timedelta(days=2)).replace(microsecond=0) self.mongo_db.meta.insert_one({ '_id': 'in-both-with-meta', 'updated_at': two_days_ago, }) self.mongo_db.create_collection('in-both-not-needed') import_status.main([]) mock_log_info.assert_any_call('%s collection%s without importers:', _AnyColorText('1'), ' is') mock_log_info.assert_any_call('%s collection%s not imported yet:', _AnyColorText('1'), ' is') mock_log_info.assert_any_call( 'Status report on imported collections (%d):', 3) mock_log_info.assert_any_call( '\t%s - %s - %s', _AnyColorText('in-both-not-needed'), _AnyColorText('in both not needed'), termcolor.colored('No import needed', 'green')) mock_log_info.assert_any_call( '\t%s - %s - %s', _AnyColorText('in-both'), _AnyColorText('in both'), termcolor.colored('Metainformation missing', 'red')) mock_log_info.assert_any_call( '\t%s - %s - %s', _AnyColorText('in-both-with-meta'), _AnyColorText('in both with meta (JobGroup)'), _AnyColorText(f'last import: {two_days_ago}')) @mock.patch(logging.__name__ + '.info') @mock.patch.dict(import_status.get_importers(), { 'non-personal': importers.Importer(name='non personal', script=None, args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), 'personal': importers.Importer(name='personal', script=None, args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=True), 'personal-no-import': importers.Importer(name='personal not imported', script=None, args=None, is_imported=False, run_every=None, proto_type=None, key=None, has_pii=True) }, clear=True) def test_personal_database(self, mock_log_info: mock.MagicMock) -> None: """Check division between personal/non personal databases.""" self.mongo_db.create_collection('non-personal') self.mongo_db.create_collection('personal') self.mongo_db.create_collection('personal-no-import') import_status.main([]) mock_log_info.assert_any_call('%s collection%s without importers:', _AnyColorText('1'), ' is') # Although non-personal is imported, it should not be as it's a Personal database. mock_log_info.assert_any_call( 'The collection%s with missing importer%s: %s\n', '', ' is', _AnyColorText("{'non-personal'}")) @mock.patch(logging.__name__ + '.info') @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer(name='Collection name', script='my-script-name', args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) def test_display_command(self, mock_log_info: mock.MagicMock) -> None: """Display the command to import a missing collection.""" import_status.main([]) mock_log_info.assert_any_call('%s collection%s not imported yet:', _AnyColorText('1'), ' is') info_log = self._find_log_call_matching('To import "Collection name"', mock_log_info) self.assertIn( 'To import "Collection name" in "collection_id", run:\ndocker-compose run --rm -e', info_log) self.assertNotIn('test_url/test_db', info_log) self.assertRegex( info_log, r'.*docker-compose run --rm .*data-analysis-prepare (\s|\\\n)*' r'python bob_emploi/data_analysis/importer/my-script-name\.py (\s|\\\n)*' r'--mongo_collection "collection_id"\n$') @mock.patch(logging.__name__ + '.info') @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer(name='Collection name', script='my-script-name', args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), 'not_displayed': importers.Importer(name='Unimportant name', script='other-script-name', args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) def test_display_command_for_specific_collection( self, mock_log_info: mock.MagicMock) -> None: """Display the command to import a specific collection.""" self.mongo_db.create_collection('not_displayed') self.mongo_db.create_collection('collection_id') import_status.main(['collection_id']) mock_log_info.assert_called_once() info_log = mock_log_info.call_args[0][0] % mock_log_info.call_args[0][ 1:] self.assertIn( 'To import "Collection name" in "collection_id", run:\ndocker-compose run --rm -e', info_log) self.assertNotIn('test_url/test_db', info_log) self.assertRegex( info_log, r'.*docker-compose run --rm .*data-analysis-prepare (\s|\\\n)*' r'python bob_emploi/data_analysis/importer/my-script-name\.py (\s|\\\n)*' r'--mongo_collection "collection_id"\n$') def test_command_on_one_line(self) -> None: """Checks that all importers command are on one line.""" # TODO(pascal): Check all deployments. for name, importer in import_status.get_importers().items(): self.assertTrue( bool(importer.script) == importer.is_imported, msg=f'Conflicts in script and is_imported field for {name}') if not importer.script: continue self.assertNotIn(importer.script, '\n', msg=name) if importer.args: for key, value in importer.args.items(): self.assertNotIn(key, '\n', msg=name) self.assertNotIn(value, '\n', msg=f'Importer "{name}", arg "{key}"') @mock.patch(logging.__name__ + '.info', new=mock.MagicMock()) @mock.patch('subprocess.run') @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer( name='Collection name', script='my-script-name', args={'custom_importer_flag': 'value for custom flag'}, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) def test_run_importer(self, mock_subprocess_run: mock.MagicMock) -> None: """Run the command to import a collection.""" import_status.main(['--run', 'collection_id']) mock_subprocess_run.assert_called_once_with([ 'python', f'{_BOB_EMPLOI_DIR}/data_analysis/importer/my-script-name.py', '--custom_importer_flag', 'value for custom flag', '--mongo_collection', 'collection_id' ], stderr=subprocess.PIPE, check=True) @mock.patch(logging.__name__ + '.error') @mock.patch( 'subprocess.run', ) @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer(name='Collection name', script='my-script-name', args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) def test_run_importer_fails(self, mock_subprocess_run: mock.MagicMock, mock_log_error: mock.MagicMock) -> None: """Run the command to import a collection.""" mock_subprocess_run.side_effect = subprocess.CalledProcessError( 2, ['the command'], stderr=b'the error') import_status.main(['--run', 'collection_id']) mock_subprocess_run.assert_called_once_with([ 'python', f'{_BOB_EMPLOI_DIR}/data_analysis/importer/my-script-name.py', '--mongo_collection', 'collection_id' ], stderr=subprocess.PIPE, check=True) mock_log_error.assert_any_call( 'Could not import "%s":\nCommand run: %s\nError: %s', 'collection_id', 'the command', 'the error') @mock.patch(logging.__name__ + '.info', new=mock.MagicMock()) @mock.patch('subprocess.run') @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer( name='Collection name', script='my-script-name', args={'custom_importer_flag': 'value for custom flag'}, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), 'other_collection_id': importers.Importer( name='Other collection name', script='other-script-name', args={'custom_importer_flag': 'other value for custom flag'}, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) def test_run_multiple_importers( self, mock_subprocess_run: mock.MagicMock) -> None: """Run the commands to import multiple collections.""" import_status.main([ '--run', 'collection_id', '--run', 'other_collection_id', ]) self.assertEqual(2, mock_subprocess_run.call_count) mock_subprocess_run.assert_any_call([ 'python', f'{_BOB_EMPLOI_DIR}/data_analysis/importer/my-script-name.py', '--custom_importer_flag', 'value for custom flag', '--mongo_collection', 'collection_id' ], stderr=subprocess.PIPE, check=True) mock_subprocess_run.assert_any_call([ 'python', f'{_BOB_EMPLOI_DIR}/data_analysis/importer/other-script-name.py', '--custom_importer_flag', 'other value for custom flag', '--mongo_collection', 'other_collection_id' ], stderr=subprocess.PIPE, check=True) @mock.patch(logging.__name__ + '.info', new=mock.MagicMock()) @mock.patch('subprocess.run') @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer( name='Collection name', script='my-script-name', args={'custom_importer_flag': 'value for custom flag'}, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) def test_run_importer_with_extra_args( self, mock_subprocess_run: mock.MagicMock) -> None: """Run the command to import a collection with extra args forwarded.""" import_status.main(['--run', 'collection_id', '--no_diff']) mock_subprocess_run.assert_called_once_with([ 'python', f'{_BOB_EMPLOI_DIR}/data_analysis/importer/my-script-name.py', '--custom_importer_flag', 'value for custom flag', '--mongo_collection', 'collection_id', '--no_diff' ], stderr=subprocess.PIPE, check=True) @mock.patch(logging.__name__ + '.info') @mock.patch('subprocess.run') @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer(name='Collection name', script='my-script-name', args={ 'needed_data': 'data/my_target', }, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) def test_run_importer_with_make_target( self, mock_subprocess_run: mock.MagicMock, mock_log_info: mock.MagicMock) -> None: """Run the command to import a collection with a target to be made.""" import_status.main(['--make_data', '--run', 'collection_id']) mock_subprocess_run.assert_any_call(['make', 'data/my_target'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) mock_subprocess_run.assert_any_call([ 'python', f'{_BOB_EMPLOI_DIR}/data_analysis/importer/my-script-name.py', '--needed_data', 'data/my_target', '--mongo_collection', 'collection_id' ], stderr=subprocess.PIPE, check=True) mock_log_info.assert_any_call( 'To make the data file(s) needed by %s importer, run:\n%s', 'Collection name', 'make \\\n' ' data/my_target\n') @mock.patch(logging.__name__ + '.error') @mock.patch('subprocess.run') @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer(name='Collection name', script='my-script-name', args={ 'needed_data': 'data/my_target', }, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) def test_importer_with_make_target_fails( self, mock_subprocess_run: mock.MagicMock, mock_log_error: mock.MagicMock) -> None: """Run the command to import a collection with a target to be made.""" fake_command = [ 'python', 'my-folder/my-script.py', '--long-argument', 'value', '--other-arg', 'other-value' ] mock_subprocess_run.side_effect = subprocess.CalledProcessError( 2, fake_command, stderr=b'the error') import_status.main(['--make_data', '--run', 'collection_id']) mock_subprocess_run.assert_called_once_with(['make', 'data/my_target'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True) mock_log_error.assert_any_call( 'Could not make "%s":\nCommand run: %s\nError: %s', 'data/my_target', 'python my-folder/my-script.py --long-argument value \\\n --other-arg other-value', 'the error') @mock.patch(logging.__name__ + '.info', new=mock.MagicMock()) def test_main_unknown_extra_args(self) -> None: """Unknown arg.""" with self.assertRaises(argparse.ArgumentError): import_status.main(['--no_diff']) @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer(name='Collection name', script='my-script-name', args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) @mock.patch(logging.__name__ + '.info') def test_revert_import(self, mock_log_info: mock.MagicMock) -> None: """Reverting a collection for which there is an archive.""" self.mongo_db.collection_id.insert_many([{ '_id': i } for i in range(10)]) self.mongo_db.get_collection( 'collection_id.2019-03-20_5784037a837ed').insert_many([{ '_id': i } for i in range(10, 20)]) self.mongo_db.get_collection( 'collection_id.2019-03-18_45830e7a865fa').insert_many([{ '_id': i } for i in range(20, 30)]) import_status.main(['--revert', 'collection_id']) mock_log_info.assert_called_with( 'Reverting collection "%s" to version from %s…', 'collection_id', '2019-03-20') self.assertEqual( list(range(10, 20)), [doc['_id'] for doc in self.mongo_db.collection_id.find({})]) self.assertNotIn('collection_id.2019-03-20_5784037a837ed', self.mongo_db.list_collection_names()) @mock.patch.dict(import_status.get_importers(), { 'collection_id': importers.Importer(name='Collection name', script='my-script-name', args=None, is_imported=True, run_every=None, proto_type=None, key=None, has_pii=False), }, clear=True) @mock.patch(logging.__name__ + '.error') def test_revert_missing_archive(self, mock_log_error: mock.MagicMock) -> None: """Do nothing when reverting a collection without archive.""" self.mongo_db.collection_id.insert_many([{ '_id': i } for i in range(10)]) import_status.main(['--revert', 'collection_id']) mock_log_error.assert_called_once() self.assertIn('collection_id', mock_log_error.call_args[0]) self.assertEqual( list(range(10)), [doc['_id'] for doc in self.mongo_db.collection_id.find({})])
view='Export Bob US'), diagnostic_main_challenges=importers. IMPORTERS['diagnostic_main_challenges'].updated_with_args( view='Export Bob US'), focus_emails=importers.IMPORTERS['focus_emails'].updated_with_args( view='Live in Bob US'), jobboards=importers.IMPORTERS['jobboards'].updated_with_args( view='Export Bob US'), local_diagnosis=importers.Importer( name='Local Diagnosis', script=f'{_HERE}/local_diagnosis', args={ 'hires_csv': 'data/usa/emsi_hires.csv', 'job_seekers_csv': 'data/usa/emsi_job_seekers_counts_dec_2019.csv', 'carreer_changers_tsv': f'data/usa/onet_{_ONET_VERSION}/Career_Changers_Matrix.txt', 'soc_definition_xls': 'data/usa/soc/soc_2010_definitions.xls', }, is_imported=True, run_every='30 days', proto_type=job_pb2.LocalJobStats, key='<County FIPS ID>:<job group ID>', has_pii=False), job_group_info=importers.Importer( name='Job Group Info', script=f'{_HERE}/job_group_info', args={ 'application_mode_csv': 'data/imt/application_modes.csv', 'hires_csv': 'data/usa/emsi_hires.csv',