Пример #1
0
    def test_manual_fetch_instructions_with_real_data(self):
        path = os.path.join(settings.APPS_ROOT, 'pipeline', 'metadata')
        with override_settings(PIPELINE_METADATA_DIR=path):
            tasks = load_tasks()

        # We're just checking that no exceptions get raised here
        for task in tasks.by_type('manual_fetch'):
            task.manual_fetch_instructions()
Пример #2
0
    def test_run_real_tasks(self):
        # We're not actually going to run the management commands, but we're
        # going to check that the management commands exist and can be run with
        # the given input
        path = os.path.join(settings.APPS_ROOT, 'pipeline', 'metadata')
        with override_settings(PIPELINE_METADATA_DIR=path):
            tasks = load_tasks()

        with mock.patch('django.core.management.base.BaseCommand.execute'):
            for task in tasks.by_type('auto_fetch'):
                task.run(2017, 7)

            with mock.patch('pipeline.runner.Task.unimported_paths',
                            return_value=['/some/path']):
                for task in tasks.by_type('convert'):
                    task.run(2017, 7)

            with mock.patch('pipeline.runner.Task.unimported_paths',
                            return_value=['/some/path']):
                for task in tasks.by_type('import'):
                    task.run(2017, 7)

            for task in tasks.by_type('post_process'):
                task.run(2017, 7, last_imported='2017_01')
Пример #3
0
    def test_run_real_tasks(self):
        # We're not actually going to run the management commands, but we're
        # going to check that the management commands exist and can be run with
        # the given input
        path = os.path.join(settings.APPS_ROOT, "pipeline", "metadata")
        with override_settings(PIPELINE_METADATA_DIR=path):
            tasks = load_tasks()

        with mock.patch("django.core.management.base.BaseCommand.execute"):
            for task in tasks.by_type("auto_fetch"):
                task.run(2017, 7)

            with mock.patch("pipeline.runner.Task.unimported_paths",
                            return_value=["/some/path"]):
                for task in tasks.by_type("convert"):
                    task.run(2017, 7)

            with mock.patch("pipeline.runner.Task.unimported_paths",
                            return_value=["/some/path"]):
                for task in tasks.by_type("import"):
                    task.run(2017, 7)

            for task in tasks.by_type("post_process"):
                task.run(2017, 7, last_imported="2017_01")
Пример #4
0
 def test_load_real_tasks(self):
     # We're just checking that no exceptions get raised here
     path = os.path.join(settings.APPS_ROOT, 'pipeline', 'metadata')
     with override_settings(PIPELINE_METADATA_DIR=path):
         load_tasks()
Пример #5
0
    def setUp(self):
        # Load tasks
        self.tasks = load_tasks()

        # Set up dummy files on filesystem
        for source_id, year_and_month, filename in [
            ['source_a', '2017_01', 'source_a.csv'],
            ['source_a', '2017_02', 'source_a.csv'],
            ['source_a', '2017_03', 'source_a.csv'],
            ['source_b', '2017_01', 'source_b_1701.csv'],
            ['source_b', '2017_02', 'source_b_1702.csv'],
            ['source_b', '2017_03', 'source_b_1703.csv'],
            ['source_c', '2017_01', 'source_c1.csv'],
            ['source_c', '2017_01', 'source_c2.csv'],
            ['source_c', '2017_02', 'source_c1.csv'],
            ['source_c', '2017_02', 'source_c2.csv'],
        ]:
            path = build_path(source_id, year_and_month, filename)
            dir_path = os.path.dirname(path)
            try:
                os.makedirs(dir_path)
            except OSError as e:
                import errno
                if e.errno != errno.EEXIST or not os.path.isdir(dir_path):
                    raise
            with open(path, 'w') as f:
                f.write('1,2,3\n')

        # Set up dummy log data
        log_data = {
            'source_a': [{
                'imported_file':
                build_path('source_a', '2017_01', 'source_a.csv'),
                'imported_at':
                '2017-01-01T12:00:00'
            }, {
                'imported_file':
                build_path('source_a', '2017_02', 'source_a.csv'),
                'imported_at':
                '2017-02-01T12:00:00'
            }],
            'source_b': [{
                'imported_file':
                build_path('source_b', '2017_01', 'source_b_1701.csv'),
                'imported_at':
                '2017-01-01T12:00:00'
            }, {
                'imported_file':
                build_path('source_b', '2017_02', 'source_b_1702.csv'),
                'imported_at':
                '2017-02-01T12:00:00'
            }],
            'source_c': [{
                'imported_file':
                build_path('source_c', '2017_01', 'source_c2.csv'),
                'imported_at':
                '2017-01-01T12:00:00'
            }, {
                'imported_file':
                build_path('source_c', '2017_02', 'source_c2.csv'),
                'imported_at':
                '2017-02-01T12:00:00'
            }]
        }

        with open(settings.PIPELINE_IMPORT_LOG_PATH, 'w') as f:
            json.dump(log_data, f)
Пример #6
0
    def setUp(self):
        # Load tasks
        self.tasks = load_tasks()

        # Set up dummy files on filesystem
        for source_id, year_and_month, filename in [
            ["source_a", "2017_01", "source_a.csv"],
            ["source_a", "2017_02", "source_a.csv"],
            ["source_a", "2017_03", "source_a.csv"],
            ["source_b", "2017_01", "source_b_1701.csv"],
            ["source_b", "2017_02", "source_b_1702.csv"],
            ["source_b", "2017_03", "source_b_1703.csv"],
            ["source_c", "2017_01", "source_c1.csv"],
            ["source_c", "2017_01", "source_c2.csv"],
            ["source_c", "2017_02", "source_c1.csv"],
            ["source_c", "2017_02", "source_c2.csv"],
        ]:
            path = build_path(source_id, year_and_month, filename)
            dir_path = os.path.dirname(path)
            try:
                os.makedirs(dir_path)
            except OSError as e:
                import errno

                if e.errno != errno.EEXIST or not os.path.isdir(dir_path):
                    raise
            with open(path, "w") as f:
                f.write("1,2,3\n")

        # Set up dummy log data
        log_data = {
            "source_a": [
                {
                    "imported_file":
                    build_path("source_a", "2017_01", "source_a.csv"),
                    "imported_at":
                    "2017-01-01T12:00:00",
                },
                {
                    "imported_file":
                    build_path("source_a", "2017_02", "source_a.csv"),
                    "imported_at":
                    "2017-02-01T12:00:00",
                },
            ],
            "source_b": [
                {
                    "imported_file":
                    build_path("source_b", "2017_01", "source_b_1701.csv"),
                    "imported_at":
                    "2017-01-01T12:00:00",
                },
                {
                    "imported_file":
                    build_path("source_b", "2017_02", "source_b_1702.csv"),
                    "imported_at":
                    "2017-02-01T12:00:00",
                },
            ],
            "source_c": [
                {
                    "imported_file":
                    build_path("source_c", "2017_01", "source_c2.csv"),
                    "imported_at":
                    "2017-01-01T12:00:00",
                },
                {
                    "imported_file":
                    build_path("source_c", "2017_02", "source_c2.csv"),
                    "imported_at":
                    "2017-02-01T12:00:00",
                },
            ],
        }

        with open(settings.PIPELINE_IMPORT_LOG_PATH, "w") as f:
            json.dump(log_data, f)