def test_main_happy_path(self, logger_setup, es_conn): logger = Mock() logger_setup.return_value = logger es = Mock() es.indices.exists_alias.side_effect = [True, True] es_conn.return_value = es argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() # Not creating indices es.indices.create.assert_not_called() # Not staging aliases es.indices.put_alias.assert_not_called() es.indices.delete.assert_not_called() # Directly writing the mapping es.indices.put_mapping.assert_called_once_with(index='peach-v1', doc_type='taxonomy', body=ANY) # Indexing es.index.assert_called_once_with(body=ANY, index='peach-v1', doc_type='taxonomy', id=1, refresh=True) logger.info.assert_any_call('Begin updating taxonomy for peach-v1') logger.info.assert_any_call('Indexing taxonomy...') logger.info.assert_any_call('Completed indexing taxonomy')
def test_verify_happy_path(self, boto3): dataset = make_configargs({'content_length': 180}) bucket = Mock() bucket.Object.return_value = dataset s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 self.optional.insert(0, '--dump-config') argv = build_argv(self.optional, self.positional) with captured_output(argv) as (out, err): sut.main() # assert calls boto3.resource.assert_called_once_with('s3') s3.Bucket.assert_called_once_with('foo') # assert file size update with open(self.json_size_file, 'r') as f: prev_json_size = int(f.read()) self.assertTrue(prev_json_size == 180) # assert cache size update with open(self.cache_size_file, 'r') as f: prev_cache_size = int(f.read()) self.assertTrue(prev_cache_size != 0)
def test_main_happy_path_check_latest_no_new_data(self, boto3): dataset = make_configargs({ 'last_modified': datetime.fromtimestamp(self.timestamp, pytz.utc) }) bucket = Mock() bucket.Object.return_value = dataset s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 self.optional.insert(0, '--check-latest') # Make sure the file exists and reflects the expected timestamp open(self.actual_file, 'a').close() os.utime(self.actual_file, (self.timestamp, self.timestamp)) argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() boto3.resource.assert_called_once_with('s3') s3.Bucket.assert_called_once_with('foo') bucket.Object.assert_called_once_with('bar') self.assertTrue(True, '\nNo new data set since 08:00 ' + 'PM Sunday, September 08, 2019\n' in out.getvalue())
def test_verify_json_file_invalid(self, boto3): invalid_count_file = \ toAbsolute('__fixtures__/prev_json_size_invalid.txt') test_positional = [ 'json_data.json', invalid_count_file, self.cache_size_file ] dataset = make_configargs({'content_length': 1}) bucket = Mock() bucket.Object.return_value = dataset s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 argv = build_argv(self.optional, test_positional) with captured_output(argv) as (out, err): sut.main() # assert json size update with open(invalid_count_file, 'r') as f: prev_json_size = int(f.read()) self.assertTrue(prev_json_size != 0) # Clean up with open(invalid_count_file, 'w+') as f: f.write(str('Invalid'))
def test_main_happy_path_json(self, boto3): bucket = Mock() s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 self.optional.insert(0, '--dump-config') self.positional = [ toAbsolute('__fixtures__/from_s3.json') ] argv = build_argv(self.optional, self.positional) with captured_output(argv) as (out, err): sut.main() boto3.resource.assert_called_once_with('s3') s3.Bucket.assert_called_once_with('foo') bucket.upload_file.assert_called_once_with( 'from_s3.json.zip', 'bar/from_s3.json.zip', Callback=ANY ) console_output = out.getvalue() self.assertIn('Command Line Args:', console_output) self.assertNotIn('Defaults:', console_output) self.assertNotIn('Environment Variables:', console_output)
def test_main_happy_path(self): argv = build_argv(positional=self.positional) with captured_output(argv) as (out, err): sut.main() validate_files(self.actual_file, fixtureToAbsolute('metadata.js')) console_output = out.getvalue() self.assertEqual(console_output, '')
def test_callback(self): options = make_configargs({ 'infile': 'foo.bar' }) instance = sut.ProgressPercentage(options) with captured_output([]) as (out, err): instance(100) self.assertEqual(out.getvalue(), '\rfoo.bar 100 bytes')
def test_json(self): argv = build_argv(self.optional, self.positional) with captured_output(argv) as (out, err): sut.main() validate_json(self.actual_file, fixtureToAbsolute('utf-8.json')) actual_print = out.getvalue().strip() self.assertEqual('2 rows processed', actual_print)
def test_v2_public(self): self.positional[0] = fixtureToAbsolute('v2-public.csv') argv = build_argv(positional=self.positional) with captured_output(argv) as (out, err): sut.main() validate_files(self.actual_file, fieldsToAbsolute('v2-json.txt')) actual_print = out.getvalue().strip() self.assertEqual('Using "v2-json.txt" for field mapping', actual_print)
def test_switch_fields(self): self.optional.extend( ['--fields', fixtureToAbsolute('fields-good.txt')]) argv = build_argv(self.optional, self.positional) with captured_output(argv) as (out, err): sut.main() validate_json(self.actual_file, fixtureToAbsolute('utf-8-switched.json'))
def test_v1_intake_csv(self): self.positional[0] = fixtureToAbsolute('v1-intake.csv') optional = ['--target-format', 'CSV'] argv = build_argv(optional, self.positional) with captured_output(argv) as (out, err): sut.main() validate_files(self.actual_file, fieldsToAbsolute('v1-csv.txt')) actual_print = out.getvalue().strip() self.assertEqual('Using "v1-csv.txt" for field mapping', actual_print)
def test_bad_input(self): self.positional[0] = fixtureToAbsolute('complaints-subset.csv') argv = build_argv(positional=self.positional) with self.assertRaises(SystemExit) as ex: with captured_output(argv) as (out, err): sut.main() self.assertEqual(ex.exception.code, 2) console_output = err.getvalue() self.assertIn('Unknown field set', console_output)
def test_main_happy_path(self): argv = build_argv(self.optional, self.positional) with captured_output(argv) as (out, err): sut.main() assert_output_equal(self.actual_file, fixtureToAbsolute('exp_hero-map-3y.json')) actual_print = out.getvalue().strip() self.assertIn('Skipping "FOO"', actual_print) self.assertIn('Skipping ""', actual_print) self.assertIn('200 rows processed', actual_print)
def test_bad_input(self): self.positional[0] = fixtureToAbsolute('exp_s3.ndjson') with self.assertRaises(SystemExit) as ex: argv = build_argv(positional=self.positional) with captured_output(argv) as (out, err): sut.main() self.assertEqual(ex.exception.code, 2) console_output = err.getvalue() self.assertIn('exp_s3.ndjson', console_output) self.assertIn('is not a valid JSON document', console_output)
def test_bad_format_argument(self): self.positional[0] = fixtureToAbsolute('v1-intake.csv') optional = ['--target-format', 'tsv'] argv = build_argv(optional, self.positional) with self.assertRaises(SystemExit) as ex: with captured_output(argv) as (out, err): sut.main() self.assertEqual(ex.exception.code, 2) console_output = err.getvalue() self.assertIn('usage: choose_field_map', console_output) self.assertIn('--target-format: invalid choice', console_output)
def test_switch_fields_too_many(self): self.optional.extend(['--fields', fixtureToAbsolute('fields-bad.txt')]) argv = build_argv(self.optional, self.positional) with self.assertRaises(SystemExit) as ex: with captured_output(argv) as (out, err): sut.main() actual_print = err.getvalue().strip() self.assertIn('has 4 fields. Expected 3', actual_print) # assert exit code self.assertEqual(ex.exception.code, 2)
def test_missing_attributes(self): self.positional[0] = fixtureToAbsolute('metadata-missing.json') with self.assertRaises(SystemExit) as ex: argv = build_argv(positional=self.positional) with captured_output(argv) as (out, err): sut.main() self.assertEqual(ex.exception.code, 1) console_output = err.getvalue() self.assertIn('metadata-missing.json', console_output) self.assertIn('is missing', console_output)
def test_cant_write(self): m = mock_open() m.side_effect = IOError with self.assertRaises(SystemExit) as ex: with patch.object(io, 'open', m): with captured_output([]) as (out, err): sut.save_metadata({}, 'foo') self.assertEqual(ex.exception.code, 5) console_output = err.getvalue() self.assertIn("Unable to write 'foo'", console_output)
def test_switch_fields_too_many(self): self.optional.extend( ['--fields', fixtureToAbsolute('fields-bad.txt')] ) argv = build_argv(self.optional, self.positional) with captured_output(argv) as (out, err): sut.main() # Still processes! validate_json(self.actual_file, fixtureToAbsolute('utf-8.json')) actual_print = err.getvalue().strip() self.assertIn('has 4 fields. Expected 3', actual_print)
def test_main_happy_path_s3(self, logger_setup, es_conn, bulk, local_time): logger = Mock() logger_setup.return_value = logger es = Mock() es.indices.exists_alias.return_value = False es_conn.return_value = es bulk.side_effect = self.capture_actions # GMT: Monday, September 9, 2019 4:00:00 AM # EDT: Monday, September 9, 2019 12:00:00 AM local_time.return_value = 1568001600 self.optional[-1] = toAbsolute('__fixtures__/from_s3.ndjson') self.optional.append('--metadata') self.optional.append(toAbsolute('__fixtures__/metadata.json')) argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() # Expected index create calls es.indices.create.assert_any_call(index='onion-v1', ignore=400) es.indices.create.assert_any_call(index='onion-v2', ignore=400) es.indices.create.assert_any_call(index='onion-v1', body=ANY) # Expected index put_alias calls es.indices.put_alias.assert_called_once_with(name='onion', index='onion-v1') # Expected index delete calls es.indices.delete.assert_called_once_with(index='onion-v1') # Bulk bulk.assert_called_once_with(es, actions=ANY, index='onion-v1', doc_type='complaint', chunk_size=20000, refresh=True) self.validate_actions(toAbsolute('__fixtures__/exp_s3.ndjson')) logger.info.assert_any_call('Deleting and recreating onion-v1') logger.info.assert_any_call( 'Loading data into onion-v1 with doc_type complaint') logger.info.assert_any_call('chunk retrieved, now bulk load') logger.info.assert_any_call('1,001 records indexed, total = 1,001') logger.info.assert_any_call('Adding alias onion for index onion-v1')
def test_main_happy_path_download(self, boto3): bucket = Mock() s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() boto3.resource.assert_called_once_with('s3') s3.Bucket.assert_called_once_with('foo') bucket.download_file.assert_called_once_with( 'bar', self.actual_file, Callback=ANY )
def test_main_happy_path_ndjson(self, boto3): bucket = Mock() s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 argv = build_argv(self.optional, self.positional) with captured_output(argv) as (out, err): sut.main() boto3.resource.assert_called_once_with('s3') s3.Bucket.assert_called_once_with('foo') bucket.upload_file.assert_called_once_with( 'from_s3.ndjson.zip', 'bar/from_s3.ndjson.zip', Callback=ANY )
def test_main_dump_config(self, logger_setup, es_conn): logger = Mock() logger_setup.return_value = logger es = Mock() es.indices.exists_alias.side_effect = [True, True] es_conn.return_value = es self.optional.insert(0, '--dump-config') argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() logger.info.assert_any_call('Running index_taxonomy with')
def test_main_happy_path_socrata(self, logger_setup, es_conn, bulk): logger = Mock() logger_setup.return_value = logger es = Mock() es.indices.exists_alias.return_value = False es_conn.return_value = es bulk.side_effect = self.capture_actions self.optional.insert(0, '--dump-config') self.optional[-1] = toAbsolute('../../tests/__fixtures__/ccdb.ndjson') argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() # Expected index create calls es.indices.create.assert_any_call(index='onion-v1', ignore=400) es.indices.create.assert_any_call(index='onion-v2', ignore=400) es.indices.create.assert_any_call(index='onion-v1', body=ANY) # Expected index put_alias calls es.indices.put_alias.assert_called_once_with(name='onion', index='onion-v1') # Expected index delete calls es.indices.delete.assert_called_once_with(index='onion-v1') # Bulk bulk.assert_called_once_with(es, actions=ANY, index='onion-v1', doc_type='complaint', chunk_size=20000, refresh=True) self.validate_actions(toAbsolute('__fixtures__/exp_socrata.ndjson')) logger.info.assert_any_call('Running index_ccdb with') logger.info.assert_any_call('Deleting and recreating onion-v1') logger.info.assert_any_call( 'Loading data into onion-v1 with doc_type complaint') logger.info.assert_any_call('chunk retrieved, now bulk load') logger.info.assert_any_call('1,001 records indexed, total = 1,001') logger.info.assert_any_call('Adding alias onion for index onion-v1')
def test_verify_file_verify_failure(self, boto3): dataset = make_configargs({'content_length': -1}) bucket = Mock() bucket.Object.return_value = dataset s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 with self.assertRaises(SystemExit) as ex: argv = build_argv(self.optional, self.positional) with captured_output(argv) as (out, err): sut.main() # assert calls boto3.resource.assert_called_once_with('s3') s3.Bucket.assert_called_once_with('foo') # assert exit code self.assertEqual(ex.exception.code, 2)
def test_main_happy_path_download(self, boto3): bucket = Mock() s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 self.optional.insert(0, '--dump-config') argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() boto3.resource.assert_called_once_with('s3') s3.Bucket.assert_called_once_with('foo') bucket.download_file.assert_called_once_with( 'bar', self.actual_file, Callback=ANY ) console_output = out.getvalue() self.assertIn('Command Line Args:', console_output) self.assertIn('Defaults:', console_output) self.assertIn('--timezone:', console_output)
def test_main_transport_error(self, logger_setup, es_conn, bulk): from elasticsearch import TransportError logger = Mock() logger_setup.return_value = logger es = Mock() es.indices.exists_alias.return_value = False es_conn.return_value = es bulk.side_effect = TransportError(404, 'oops') argv = build_argv(self.optional) with captured_output(argv) as (out, err): with self.assertRaises(SystemExit): sut.main() # Rollback es.indices.put_alias.assert_called_once_with(name='onion', index='onion-v2') self.assertEqual(logger.error.call_count, 1)
def test_main_happy_path(self, ccdb_index, taxonomy_index, parser, es_ctor, test_growing): es = Mock() es_ctor.return_value = es argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() test_growing.assert_called_once_with(es, 'lizard-v1') parser.assert_called_once_with( 'https://data.consumerfinance.gov/api/views/s6ew-h6mp/rows.json', 'complaints/ccdb/ccdb_output.json', ANY) ccdb_index.index_json_data.assert_called_once_with( es, ANY, 'complaint', 'complaints/settings.json', 'complaints/ccdb/ccdb_mapping.json', 'complaints/ccdb/ccdb_output.json', 'lizard-v1', 'lizard-v2', 'lizard') taxonomy_index.index_taxonomy.assert_called_once_with( es, ANY, 'complaints/taxonomy/taxonomy.txt', 'lizard')
def test_main_happy_path_check_latest(self, boto3): dataset = make_configargs({ 'last_modified': datetime.fromtimestamp(self.timestamp, pytz.utc) }) bucket = Mock() bucket.Object.return_value = dataset s3 = Mock() s3.Bucket.return_value = bucket boto3.resource.return_value = s3 self.optional.insert(0, '--check-latest') argv = build_argv(self.optional) with captured_output(argv) as (out, err): sut.main() boto3.resource.assert_called_once_with('s3') s3.Bucket.assert_called_once_with('foo') bucket.Object.assert_called_once_with('bar') stat = os.stat(self.actual_file) self.assertEqual(self.timestamp, stat.st_mtime)