def test_main_happy_path(self, logger_setup, es_conn):
        logger = Mock()
        logger_setup.return_value = logger

        es = Mock()
        es.indices.exists_alias.side_effect = [True, True]
        es_conn.return_value = es

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        # Not creating indices
        es.indices.create.assert_not_called()

        # Not staging aliases
        es.indices.put_alias.assert_not_called()
        es.indices.delete.assert_not_called()

        # Directly writing the mapping
        es.indices.put_mapping.assert_called_once_with(index='peach-v1',
                                                       doc_type='taxonomy',
                                                       body=ANY)

        # Indexing
        es.index.assert_called_once_with(body=ANY,
                                         index='peach-v1',
                                         doc_type='taxonomy',
                                         id=1,
                                         refresh=True)

        logger.info.assert_any_call('Begin updating taxonomy for peach-v1')
        logger.info.assert_any_call('Indexing taxonomy...')
        logger.info.assert_any_call('Completed indexing taxonomy')
    def test_verify_happy_path(self, boto3):
        dataset = make_configargs({'content_length': 180})
        bucket = Mock()
        bucket.Object.return_value = dataset

        s3 = Mock()
        s3.Bucket.return_value = bucket
        boto3.resource.return_value = s3

        self.optional.insert(0, '--dump-config')
        argv = build_argv(self.optional, self.positional)
        with captured_output(argv) as (out, err):
            sut.main()

        # assert calls
        boto3.resource.assert_called_once_with('s3')
        s3.Bucket.assert_called_once_with('foo')

        # assert file size update
        with open(self.json_size_file, 'r') as f:
            prev_json_size = int(f.read())

        self.assertTrue(prev_json_size == 180)

        # assert cache size update
        with open(self.cache_size_file, 'r') as f:
            prev_cache_size = int(f.read())

        self.assertTrue(prev_cache_size != 0)
    def test_main_happy_path_check_latest_no_new_data(self, boto3):
        dataset = make_configargs({
            'last_modified': datetime.fromtimestamp(self.timestamp, pytz.utc)
        })
        bucket = Mock()
        bucket.Object.return_value = dataset

        s3 = Mock()
        s3.Bucket.return_value = bucket

        boto3.resource.return_value = s3

        self.optional.insert(0, '--check-latest')

        # Make sure the file exists and reflects the expected timestamp
        open(self.actual_file, 'a').close()
        os.utime(self.actual_file, (self.timestamp, self.timestamp))

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        boto3.resource.assert_called_once_with('s3')
        s3.Bucket.assert_called_once_with('foo')
        bucket.Object.assert_called_once_with('bar')

        self.assertTrue(True, '\nNo new data set since 08:00 ' +
                              'PM Sunday, September 08, 2019\n' in
                              out.getvalue())
    def test_verify_json_file_invalid(self, boto3):
        invalid_count_file = \
            toAbsolute('__fixtures__/prev_json_size_invalid.txt')

        test_positional = [
            'json_data.json', invalid_count_file, self.cache_size_file
        ]

        dataset = make_configargs({'content_length': 1})
        bucket = Mock()
        bucket.Object.return_value = dataset

        s3 = Mock()
        s3.Bucket.return_value = bucket
        boto3.resource.return_value = s3

        argv = build_argv(self.optional, test_positional)
        with captured_output(argv) as (out, err):
            sut.main()

        # assert json size update
        with open(invalid_count_file, 'r') as f:
            prev_json_size = int(f.read())

        self.assertTrue(prev_json_size != 0)

        # Clean up
        with open(invalid_count_file, 'w+') as f:
            f.write(str('Invalid'))
示例#5
0
    def test_main_happy_path_json(self, boto3):
        bucket = Mock()
        s3 = Mock()
        s3.Bucket.return_value = bucket
        boto3.resource.return_value = s3

        self.optional.insert(0, '--dump-config')
        self.positional = [
            toAbsolute('__fixtures__/from_s3.json')
        ]

        argv = build_argv(self.optional, self.positional)
        with captured_output(argv) as (out, err):
            sut.main()

        boto3.resource.assert_called_once_with('s3')
        s3.Bucket.assert_called_once_with('foo')
        bucket.upload_file.assert_called_once_with(
            'from_s3.json.zip', 'bar/from_s3.json.zip', Callback=ANY
        )

        console_output = out.getvalue()
        self.assertIn('Command Line Args:', console_output)
        self.assertNotIn('Defaults:', console_output)
        self.assertNotIn('Environment Variables:', console_output)
    def test_main_happy_path(self):
        argv = build_argv(positional=self.positional)
        with captured_output(argv) as (out, err):
            sut.main()

        validate_files(self.actual_file, fixtureToAbsolute('metadata.js'))

        console_output = out.getvalue()
        self.assertEqual(console_output, '')
示例#7
0
    def test_callback(self):
        options = make_configargs({
            'infile': 'foo.bar'
        })
        instance = sut.ProgressPercentage(options)
        with captured_output([]) as (out, err):
            instance(100)

        self.assertEqual(out.getvalue(), '\rfoo.bar  100 bytes')
示例#8
0
    def test_json(self):
        argv = build_argv(self.optional, self.positional)

        with captured_output(argv) as (out, err):
            sut.main()

        validate_json(self.actual_file, fixtureToAbsolute('utf-8.json'))

        actual_print = out.getvalue().strip()
        self.assertEqual('2 rows processed', actual_print)
示例#9
0
    def test_v2_public(self):
        self.positional[0] = fixtureToAbsolute('v2-public.csv')
        argv = build_argv(positional=self.positional)
        with captured_output(argv) as (out, err):
            sut.main()

        validate_files(self.actual_file, fieldsToAbsolute('v2-json.txt'))

        actual_print = out.getvalue().strip()
        self.assertEqual('Using "v2-json.txt" for field mapping', actual_print)
    def test_switch_fields(self):
        self.optional.extend(
            ['--fields', fixtureToAbsolute('fields-good.txt')])
        argv = build_argv(self.optional, self.positional)

        with captured_output(argv) as (out, err):
            sut.main()

        validate_json(self.actual_file,
                      fixtureToAbsolute('utf-8-switched.json'))
示例#11
0
    def test_v1_intake_csv(self):
        self.positional[0] = fixtureToAbsolute('v1-intake.csv')
        optional = ['--target-format', 'CSV']
        argv = build_argv(optional, self.positional)
        with captured_output(argv) as (out, err):
            sut.main()

        validate_files(self.actual_file, fieldsToAbsolute('v1-csv.txt'))

        actual_print = out.getvalue().strip()
        self.assertEqual('Using "v1-csv.txt" for field mapping', actual_print)
示例#12
0
    def test_bad_input(self):
        self.positional[0] = fixtureToAbsolute('complaints-subset.csv')
        argv = build_argv(positional=self.positional)
        with self.assertRaises(SystemExit) as ex:
            with captured_output(argv) as (out, err):
                sut.main()

        self.assertEqual(ex.exception.code, 2)

        console_output = err.getvalue()
        self.assertIn('Unknown field set', console_output)
    def test_main_happy_path(self):
        argv = build_argv(self.optional, self.positional)
        with captured_output(argv) as (out, err):
            sut.main()

        assert_output_equal(self.actual_file,
                            fixtureToAbsolute('exp_hero-map-3y.json'))

        actual_print = out.getvalue().strip()
        self.assertIn('Skipping "FOO"', actual_print)
        self.assertIn('Skipping ""', actual_print)
        self.assertIn('200 rows processed', actual_print)
示例#14
0
    def test_bad_input(self):
        self.positional[0] = fixtureToAbsolute('exp_s3.ndjson')

        with self.assertRaises(SystemExit) as ex:
            argv = build_argv(positional=self.positional)
            with captured_output(argv) as (out, err):
                sut.main()

        self.assertEqual(ex.exception.code, 2)

        console_output = err.getvalue()
        self.assertIn('exp_s3.ndjson', console_output)
        self.assertIn('is not a valid JSON document', console_output)
示例#15
0
    def test_bad_format_argument(self):
        self.positional[0] = fixtureToAbsolute('v1-intake.csv')
        optional = ['--target-format', 'tsv']
        argv = build_argv(optional, self.positional)
        with self.assertRaises(SystemExit) as ex:
            with captured_output(argv) as (out, err):
                sut.main()

        self.assertEqual(ex.exception.code, 2)

        console_output = err.getvalue()
        self.assertIn('usage: choose_field_map', console_output)
        self.assertIn('--target-format: invalid choice', console_output)
    def test_switch_fields_too_many(self):
        self.optional.extend(['--fields', fixtureToAbsolute('fields-bad.txt')])
        argv = build_argv(self.optional, self.positional)

        with self.assertRaises(SystemExit) as ex:
            with captured_output(argv) as (out, err):
                sut.main()

        actual_print = err.getvalue().strip()
        self.assertIn('has 4 fields.  Expected 3', actual_print)

        # assert exit code
        self.assertEqual(ex.exception.code, 2)
示例#17
0
    def test_missing_attributes(self):
        self.positional[0] = fixtureToAbsolute('metadata-missing.json')

        with self.assertRaises(SystemExit) as ex:
            argv = build_argv(positional=self.positional)
            with captured_output(argv) as (out, err):
                sut.main()

        self.assertEqual(ex.exception.code, 1)

        console_output = err.getvalue()
        self.assertIn('metadata-missing.json', console_output)
        self.assertIn('is missing', console_output)
示例#18
0
    def test_cant_write(self):
        m = mock_open()
        m.side_effect = IOError

        with self.assertRaises(SystemExit) as ex:
            with patch.object(io, 'open', m):
                with captured_output([]) as (out, err):
                    sut.save_metadata({}, 'foo')

        self.assertEqual(ex.exception.code, 5)

        console_output = err.getvalue()
        self.assertIn("Unable to write 'foo'",  console_output)
示例#19
0
    def test_switch_fields_too_many(self):
        self.optional.extend(
            ['--fields', fixtureToAbsolute('fields-bad.txt')]
        )
        argv = build_argv(self.optional, self.positional)

        with captured_output(argv) as (out, err):
            sut.main()

        # Still processes!
        validate_json(self.actual_file, fixtureToAbsolute('utf-8.json'))

        actual_print = err.getvalue().strip()
        self.assertIn('has 4 fields.  Expected 3', actual_print)
    def test_main_happy_path_s3(self, logger_setup, es_conn, bulk, local_time):
        logger = Mock()
        logger_setup.return_value = logger

        es = Mock()
        es.indices.exists_alias.return_value = False
        es_conn.return_value = es

        bulk.side_effect = self.capture_actions

        # GMT: Monday, September 9, 2019 4:00:00 AM
        # EDT: Monday, September 9, 2019 12:00:00 AM
        local_time.return_value = 1568001600

        self.optional[-1] = toAbsolute('__fixtures__/from_s3.ndjson')
        self.optional.append('--metadata')
        self.optional.append(toAbsolute('__fixtures__/metadata.json'))

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        # Expected index create calls
        es.indices.create.assert_any_call(index='onion-v1', ignore=400)
        es.indices.create.assert_any_call(index='onion-v2', ignore=400)
        es.indices.create.assert_any_call(index='onion-v1', body=ANY)

        # Expected index put_alias calls
        es.indices.put_alias.assert_called_once_with(name='onion',
                                                     index='onion-v1')

        # Expected index delete calls
        es.indices.delete.assert_called_once_with(index='onion-v1')

        # Bulk
        bulk.assert_called_once_with(es,
                                     actions=ANY,
                                     index='onion-v1',
                                     doc_type='complaint',
                                     chunk_size=20000,
                                     refresh=True)

        self.validate_actions(toAbsolute('__fixtures__/exp_s3.ndjson'))

        logger.info.assert_any_call('Deleting and recreating onion-v1')
        logger.info.assert_any_call(
            'Loading data into onion-v1 with doc_type complaint')
        logger.info.assert_any_call('chunk retrieved, now bulk load')
        logger.info.assert_any_call('1,001 records indexed, total = 1,001')
        logger.info.assert_any_call('Adding alias onion for index onion-v1')
示例#21
0
    def test_main_happy_path_download(self, boto3):
        bucket = Mock()
        s3 = Mock()
        s3.Bucket.return_value = bucket
        boto3.resource.return_value = s3

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        boto3.resource.assert_called_once_with('s3')
        s3.Bucket.assert_called_once_with('foo')
        bucket.download_file.assert_called_once_with(
            'bar', self.actual_file, Callback=ANY
        )
示例#22
0
    def test_main_happy_path_ndjson(self, boto3):
        bucket = Mock()
        s3 = Mock()
        s3.Bucket.return_value = bucket
        boto3.resource.return_value = s3

        argv = build_argv(self.optional, self.positional)
        with captured_output(argv) as (out, err):
            sut.main()

        boto3.resource.assert_called_once_with('s3')
        s3.Bucket.assert_called_once_with('foo')
        bucket.upload_file.assert_called_once_with(
            'from_s3.ndjson.zip', 'bar/from_s3.ndjson.zip', Callback=ANY
        )
示例#23
0
    def test_main_dump_config(self, logger_setup, es_conn):
        logger = Mock()
        logger_setup.return_value = logger

        es = Mock()
        es.indices.exists_alias.side_effect = [True, True]
        es_conn.return_value = es

        self.optional.insert(0, '--dump-config')

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        logger.info.assert_any_call('Running index_taxonomy with')
    def test_main_happy_path_socrata(self, logger_setup, es_conn, bulk):
        logger = Mock()
        logger_setup.return_value = logger

        es = Mock()
        es.indices.exists_alias.return_value = False
        es_conn.return_value = es

        bulk.side_effect = self.capture_actions

        self.optional.insert(0, '--dump-config')
        self.optional[-1] = toAbsolute('../../tests/__fixtures__/ccdb.ndjson')

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        # Expected index create calls
        es.indices.create.assert_any_call(index='onion-v1', ignore=400)
        es.indices.create.assert_any_call(index='onion-v2', ignore=400)
        es.indices.create.assert_any_call(index='onion-v1', body=ANY)

        # Expected index put_alias calls
        es.indices.put_alias.assert_called_once_with(name='onion',
                                                     index='onion-v1')

        # Expected index delete calls
        es.indices.delete.assert_called_once_with(index='onion-v1')

        # Bulk
        bulk.assert_called_once_with(es,
                                     actions=ANY,
                                     index='onion-v1',
                                     doc_type='complaint',
                                     chunk_size=20000,
                                     refresh=True)

        self.validate_actions(toAbsolute('__fixtures__/exp_socrata.ndjson'))

        logger.info.assert_any_call('Running index_ccdb with')
        logger.info.assert_any_call('Deleting and recreating onion-v1')
        logger.info.assert_any_call(
            'Loading data into onion-v1 with doc_type complaint')
        logger.info.assert_any_call('chunk retrieved, now bulk load')
        logger.info.assert_any_call('1,001 records indexed, total = 1,001')
        logger.info.assert_any_call('Adding alias onion for index onion-v1')
    def test_verify_file_verify_failure(self, boto3):
        dataset = make_configargs({'content_length': -1})
        bucket = Mock()
        bucket.Object.return_value = dataset

        s3 = Mock()
        s3.Bucket.return_value = bucket
        boto3.resource.return_value = s3

        with self.assertRaises(SystemExit) as ex:
            argv = build_argv(self.optional, self.positional)
            with captured_output(argv) as (out, err):
                sut.main()

        # assert calls
        boto3.resource.assert_called_once_with('s3')
        s3.Bucket.assert_called_once_with('foo')

        # assert exit code
        self.assertEqual(ex.exception.code, 2)
    def test_main_happy_path_download(self, boto3):
        bucket = Mock()
        s3 = Mock()
        s3.Bucket.return_value = bucket
        boto3.resource.return_value = s3

        self.optional.insert(0, '--dump-config')

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        boto3.resource.assert_called_once_with('s3')
        s3.Bucket.assert_called_once_with('foo')
        bucket.download_file.assert_called_once_with(
            'bar', self.actual_file, Callback=ANY
        )

        console_output = out.getvalue()
        self.assertIn('Command Line Args:', console_output)
        self.assertIn('Defaults:', console_output)
        self.assertIn('--timezone:', console_output)
    def test_main_transport_error(self, logger_setup, es_conn, bulk):
        from elasticsearch import TransportError

        logger = Mock()
        logger_setup.return_value = logger

        es = Mock()
        es.indices.exists_alias.return_value = False
        es_conn.return_value = es

        bulk.side_effect = TransportError(404, 'oops')

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            with self.assertRaises(SystemExit):
                sut.main()

        # Rollback
        es.indices.put_alias.assert_called_once_with(name='onion',
                                                     index='onion-v2')

        self.assertEqual(logger.error.call_count, 1)
    def test_main_happy_path(self, ccdb_index, taxonomy_index, parser, es_ctor,
                             test_growing):
        es = Mock()
        es_ctor.return_value = es

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        test_growing.assert_called_once_with(es, 'lizard-v1')

        parser.assert_called_once_with(
            'https://data.consumerfinance.gov/api/views/s6ew-h6mp/rows.json',
            'complaints/ccdb/ccdb_output.json', ANY)

        ccdb_index.index_json_data.assert_called_once_with(
            es, ANY, 'complaint', 'complaints/settings.json',
            'complaints/ccdb/ccdb_mapping.json',
            'complaints/ccdb/ccdb_output.json', 'lizard-v1', 'lizard-v2',
            'lizard')

        taxonomy_index.index_taxonomy.assert_called_once_with(
            es, ANY, 'complaints/taxonomy/taxonomy.txt', 'lizard')
    def test_main_happy_path_check_latest(self, boto3):
        dataset = make_configargs({
            'last_modified': datetime.fromtimestamp(self.timestamp, pytz.utc)
        })
        bucket = Mock()
        bucket.Object.return_value = dataset

        s3 = Mock()
        s3.Bucket.return_value = bucket

        boto3.resource.return_value = s3

        self.optional.insert(0, '--check-latest')

        argv = build_argv(self.optional)
        with captured_output(argv) as (out, err):
            sut.main()

        boto3.resource.assert_called_once_with('s3')
        s3.Bucket.assert_called_once_with('foo')
        bucket.Object.assert_called_once_with('bar')

        stat = os.stat(self.actual_file)
        self.assertEqual(self.timestamp, stat.st_mtime)