Exemplo n.º 1
0
def extract_alert_settings(env_setup):
    settings = startup.read_settings(filename=extract_alerts.CONFIG_FILE,
                                     complain=False)
    settings.source.database.ssl = None  # NOT REQUIRED FOR TEST DATABASE
    constants.set(settings.constants)
    Log.start(settings.debug)
    return settings
Exemplo n.º 2
0
def main():
    try:
        settings = startup.read_settings()
        with startup.SingleInstance(settings.args.filename):
            constants.set(settings.constants)
            Log.start(settings.debug)

            extractor = Extract(settings)

            def extract(please_stop):
                with MySQL(**settings.snowflake.database) as db:
                    with db.transaction():
                        for kwargs in extractor.queue:
                            if please_stop:
                                break
                            try:
                                extractor.extract(db=db,
                                                  please_stop=please_stop,
                                                  **kwargs)
                            except Exception as e:
                                Log.warning("Could not extract", cause=e)
                                extractor.queue.add(kwargs)

            for i in range(settings.extract.threads):
                Thread.run("extract #" + text_type(i), extract)

            please_stop = Signal()
            Thread.wait_for_shutdown_signal(please_stop=please_stop,
                                            allow_exit=True,
                                            wait_forever=False)
    except Exception as e:
        Log.warning("Problem with data extraction", e)
    finally:
        Log.stop()
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)
        with SingleInstance(flavor_id=settings.args.filename):
            settings.run_interval = Duration(settings.run_interval)
            for u in settings.utility:
                u.discount = coalesce(u.discount, 0)
                # MARKUP drives WITH EXPECTED device MAPPING
                num_ephemeral_volumes = ephemeral_storage[
                    u.instance_type]["num"]
                for i, d in enumerate(d for d in u.drives if not d.device):
                    letter = convert.ascii2char(98 + num_ephemeral_volumes + i)
                    d.device = "/dev/xvd" + letter

            settings.utility = UniqueIndex(["instance_type"],
                                           data=settings.utility)
            instance_manager = new_instance(settings.instance)
            m = SpotManager(instance_manager, kwargs=settings)

            if ENABLE_SIDE_EFFECTS:
                m.update_spot_requests()

            if m.watcher:
                m.watcher.join()
    except Exception as e:
        Log.warning("Problem with spot manager", cause=e)
    finally:
        Log.stop()
        MAIN_THREAD.stop()
Exemplo n.º 4
0
 def setUpClass(cls):
     try:
         cls.config = startup.read_settings(filename="tests/config.json")
         constants.set(cls.config.constants)
         Log.start(cls.config.debug)
     except Exception as e:
         Log.error("Problem with etl", e)
Exemplo n.º 5
0
def main():
    try:
        settings = startup.read_settings(
            defs=[{
                "name": ["--all", "-a"],
                "action": 'store_true',
                "help": 'process all mo-* subdirectories',
                "dest": "all",
                "required": False
            }, {
                "name": ["--dir", "--directory", "-d"],
                "help": 'directory to deploy',
                "type": str,
                "dest": "directory",
                "required": True,
                "default": "."
            }])
        constants.set(settings.constants)
        Log.start(settings.debug)

        if settings.args.all:
            deploy_all(File(settings.args.directory), settings.prefix,
                       settings)
        else:
            Deploy(File(settings.args.directory), kwargs=settings).deploy()
    except Exception, e:
        Log.warning("Problem with etl", cause=e)
Exemplo n.º 6
0
def main():
    try:
        settings = startup.read_settings()
        Log.start(settings.debug)
        with SingleInstance(flavor_id=settings.args.filename):
            constants.set(settings.constants)
            settings.run_interval = Duration(settings.run_interval)
            for u in settings.utility:
                u.discount = coalesce(u.discount, 0)
                # MARKUP drives WITH EXPECTED device MAPPING
                num_ephemeral_volumes = ephemeral_storage[u.instance_type]["num"]
                for i, d in enumerate(d for d in u.drives if not d.device):
                    letter = convert.ascii2char(98 + num_ephemeral_volumes + i)
                    d.device = "/dev/xvd" + letter

            settings.utility = UniqueIndex(["instance_type"], data=settings.utility)
            instance_manager = new_instance(settings.instance)
            m = SpotManager(instance_manager, kwargs=settings)

            if ENABLE_SIDE_EFFECTS:
                m.update_spot_requests(instance_manager.required_utility())

            if m.watcher:
                m.watcher.join()
    except Exception as e:
        Log.warning("Problem with spot manager", cause=e)
    finally:
        Log.stop()
        MAIN_THREAD.stop()
Exemplo n.º 7
0
def main():
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)
        please_stop = Signal("main stop signal")
        Thread.wait_for_shutdown_signal(please_stop)
    except Exception, e:
        Log.error("Problem with etl", cause=e)
Exemplo n.º 8
0
def setup(settings=None):
    global config

    try:
        config = startup.read_settings(defs={
            "name": ["--process_num", "--process"],
            "help": "Additional port offset (for multiple Flask processes",
            "type": int,
            "dest": "process_num",
            "default": 0,
            "required": False
        },
                                       filename=settings)
        constants.set(config.constants)
        Log.start(config.debug)

        if config.args.process_num and config.flask.port:
            config.flask.port += config.args.process_num

        # PIPE REQUEST LOGS TO ES DEBUG
        if config.request_logs:
            request_logger = elasticsearch.Cluster(
                config.request_logs).get_or_create_index(config.request_logs)
            active_data.request_log_queue = request_logger.threaded_queue(
                max_size=2000)

        # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY
        containers.config.default = {
            "type": "elasticsearch",
            "settings": config.elasticsearch.copy()
        }

        # TURN ON /exit FOR WINDOWS DEBUGGING
        if config.flask.debug or config.flask.allow_exit:
            config.flask.allow_exit = None
            Log.warning("ActiveData is in debug mode")
            app.add_url_rule('/exit', 'exit', _exit)

        # TRIGGER FIRST INSTANCE
        FromESMetadata(config.elasticsearch)
        if config.saved_queries:
            setattr(save_query, "query_finder",
                    SaveQueries(config.saved_queries))
        HeaderRewriterFix(app, remove_headers=['Date', 'Server'])

        if config.flask.ssl_context:
            if config.args.process_num:
                Log.error(
                    "can not serve ssl and multiple Flask instances at once")
            setup_ssl()

        return app
    except Exception, e:
        Log.error(
            "Serious problem with ActiveData service construction!  Shutdown!",
            cause=e)
Exemplo n.º 9
0
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)
        ETL(settings).setup(settings.instance, settings.utility)
    except Exception as e:
        Log.warning("Problem with setup of ETL", cause=e)
    finally:
        Log.stop()
Exemplo n.º 10
0
def start():
    try:
        config = json2value(STDIN.readline().decode('utf8'))
        constants.set(config.constants)
        Log.start(set_default(config.debug, {"logs": [{"type": "raw"}]}))
        command_loop({"config": config})
    except Exception as e:
        Log.error("problem staring worker", cause=e)
    finally:
        Log.stop()
Exemplo n.º 11
0
def main():
    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)
        ETL(settings).setup(settings.instance, settings.utility)
    except Exception as e:
        Log.warning("Problem with setup of ETL", cause=e)
    finally:
        Log.stop()
Exemplo n.º 12
0
    def setUpClass(cls):
        global config, broker
        try:
            config = startup.read_settings(filename="tests/config/file.json")
            constants.set(config.constants)
            Log.start(config.debug)

            File(config.broker.backing.directory).delete()
            broker = Broker(kwargs=config.broker)
        except Exception as e:
            Log.error("could not setup for testing", cause=e)
Exemplo n.º 13
0
def main():
    global config
    global hg
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        hg = HgMozillaOrg(config)
        Log.start(config.debug)

    except Exception as e:
        Log.error("Problem with etl", e)
Exemplo n.º 14
0
def main():
    settings = startup.read_settings()
    Log.start(settings.debug)
    constants.set(settings.constants)

    try:
        _synch(settings)
    except Exception as e:
        Log.error("Problem with synch", e)
    finally:
        Log.stop()
Exemplo n.º 15
0
    def run(self, force=False, restart=False, start=None, merge=False):
        try:
            # SETUP LOGGING
            settings = startup.read_settings(filename=CONFIG_FILE)
            constants.set(settings.constants)
            Log.start(settings.debug)

            self.extract(settings, force, restart, start, merge)
        except Exception as e:
            Log.error("could not extract jobs", cause=e)
        finally:
            Log.stop()
Exemplo n.º 16
0
def start():
    try:
        line = STDIN.readline().decode("utf8")
        config = json2value(line)
        constants.set(config.constants)
        Log.start(config.debug)
        Log.set_logger(RawLogger())
        command_loop({"config": config})
    except Exception as e:
        Log.error("problem staring worker", cause=e)
    finally:
        Log.stop()
Exemplo n.º 17
0
def main():
    global config
    global hg
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)
        hg = HgMozillaOrg(config)
        random = _parse_diff(
            Data(changeset={"id": "2d9d0bebb5c6"},
                 branch={"url": "https://hg.mozilla.org/mozilla-central"}))
    except Exception as e:
        Log.error("Problem with etl", e)
Exemplo n.º 18
0
 def setUpClass(cls):
     Log.start(settings.debug)
     with Timer("setup database"):
         try:
             with MySQL(schema=None, kwargs=settings.database) as db:
                 db.query("drop database testing")
         except Exception as e:
             if "Can't drop database " in e:
                 pass
             else:
                 Log.warning("problem removing db", cause=e)
         MySQL.execute_file("tests/resources/database.sql",
                            schema=None,
                            kwargs=settings.database)
Exemplo n.º 19
0
def main():
    try:
        settings = wrap({
            "elasticsearch": {
                "host": "http://activedata.allizom.org",
                "port": 9200,
                "debug": True
            }
        })

        Log.start(settings)
        move_shards(settings)
    except Exception, e:
        Log.error("Problem with assign of shards", e)
Exemplo n.º 20
0
def main():

    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        branches = _get_branches_from_hg(settings.hg)

        es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches)
        es.add_alias()
        es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches)
        Log.alert("DONE!")
    except Exception as e:
        Log.error("Problem with etl", e)
    finally:
        Log.stop()
Exemplo n.º 21
0
def main():
    try:
        config = startup.read_settings()
        constants.set(config.constants)
        inject_secrets(config)

        with Timer("PATCH ADR: dd update() method to Configuration class"):

            def update(self, config):
                """
                Update the configuration object with new parameters
                :param config: dict of configuration
                """
                for k, v in config.items():
                    if v != None:
                        self._config[k] = v

                self._config["sources"] = sorted(
                    map(os.path.expanduser, set(self._config["sources"]))
                )

                # Use the NullStore by default. This allows us to control whether
                # caching is enabled or not at runtime.
                self._config["cache"].setdefault("stores", {"null": {"driver": "null"}})
                object.__setattr__(self, "cache", CacheManager(self._config["cache"]))
                self.cache.extend("null", lambda driver: NullStore())

            setattr(Configuration, "update", update)

        # UPDATE ADR COFIGURATION
        adr.config.update(config.adr)

        Log.start(config.debug)

        # SHUNT ADR LOGGING TO MAIN LOGGING
        # https://loguru.readthedocs.io/en/stable/api/logger.html#loguru._logger.Logger.add
        loguru.logger.remove()
        loguru.logger.add(
            _logging, level="DEBUG", format="{message}", filter=lambda r: True,
        )

        Schedulers(config).process()
    except Exception as e:
        Log.warning("Problem with etl! Shutting down.", cause=e)
    finally:
        Log.stop()
Exemplo n.º 22
0
def main():

    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        branches = _get_branches_from_hg(settings.hg)

        es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches)
        es.add_alias()
        es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches)
        Log.alert("DONE!")
    except Exception as e:
        Log.error("Problem with etl", e)
    finally:
        Log.stop()
Exemplo n.º 23
0
def extract_job_settings():
    # These values not directly accessed during testing, but the code requires that they be present.
    os.environ["NEW_RELIC_APP_NAME"] = "testing"
    os.environ["BIGQUERY_PRIVATE_KEY_ID"] = "1"
    os.environ["BIGQUERY_PRIVATE_KEY"] = "1"

    # USE THE TEST SCHEMA
    db_url = os.environ["DATABASE_URL"]
    db_url = db_url.replace(strings.between(db_url, "/", None),
                            DATABASES["default"]["TEST"]["NAME"])
    os.environ["DATABASE_URL"] = db_url

    settings = startup.read_settings(filename=extract_jobs.CONFIG_FILE,
                                     complain=False)
    settings.source.database.ssl = None  # NOT REQUIRED FOR TEST DATABASE
    constants.set(settings.constants)
    Log.start(settings.debug)
    return settings
Exemplo n.º 24
0
def setup():
    global config

    config = startup.read_settings(
        filename=os.environ.get('ACTIVEDATA_CONFIG'),
        defs=[
            {
                "name": ["--process_num", "--process"],
                "help": "Additional port offset (for multiple Flask processes",
                "type": int,
                "dest": "process_num",
                "default": 0,
                "required": False
            }
        ]
    )

    constants.set(config.constants)
    Log.start(config.debug)

    # PIPE REQUEST LOGS TO ES DEBUG
    if config.request_logs:
        cluster = elasticsearch.Cluster(config.request_logs)
        request_logger = cluster.get_or_create_index(config.request_logs)
        active_data.request_log_queue = request_logger.threaded_queue(max_size=2000)

    if config.dockerflow:
        def backend_check():
            http.get_json(config.elasticsearch.host + ":" + text_type(config.elasticsearch.port))
        dockerflow(flask_app, backend_check)


    # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY
    container.config.default = {
        "type": "elasticsearch",
        "settings": config.elasticsearch.copy()
    }

    # TRIGGER FIRST INSTANCE
    if config.saved_queries:
        setattr(save_query, "query_finder", SaveQueries(config.saved_queries))

    HeaderRewriterFix(flask_app, remove_headers=['Date', 'Server'])
Exemplo n.º 25
0
def main(num):
    try:
        Log.start()
        results = []
        test_json(results, "mo-json encoder", json_encoder, num)
        test_json(results, "mo-json encoder (again)", json_encoder, num)
        test_json(results, "scrub before json.dumps",
                  cPythonJSONEncoder().encode, num)
        test_json(results, "override JSONEncoder.default()",
                  EnhancedJSONEncoder().encode, num)
        test_json(results, "default json.dumps", json.dumps,
                  num)  # WILL CRASH, CAN NOT HANDLE DIVERSITY OF TYPES
        test_json(results, "typed json", typed_encoder.encode, num)

        # test_json(results, "scrubbed ujson", ujson.dumps, num)  # THIS PLAIN CRASHES

        Log.note(u"\n{{summary}}", summary=convert.list2tab(results))
    finally:
        Log.stop()
Exemplo n.º 26
0
def main():
    try:
        config = startup.read_settings(defs=[{
            "name": ["--file"],
            "help": "file to save backup",
            "type": str,
            "dest": "file",
            "required": True
        }])
        constants.set(config.constants)
        Log.start(config.debug)

        sq = elasticsearch.Index(kwargs=config.saved_queries)
        result = sq.search({"query": {"match_all": {}}, "size": 200000})

        File(config.args.file).write("".join(
            map(convert.json2value, result.hits.hits)))

    except Exception, e:
        Log.error("Problem with etl", e)
Exemplo n.º 27
0
def main():

    try:
        settings = startup.read_settings()
        constants.set(settings.constants)
        Log.start(settings.debug)

        hg = HgMozillaOrg(settings)
        todo = Queue()
        todo.add("97160a734959")
        least = 100000

        while todo:
            next_ = todo.pop()
            curr = hg.get_revision(
                wrap({
                    "changeset": {
                        "id": next_
                    },
                    "branch": {
                        "name": BRANCH
                    }
                }))
            if len(curr.changeset.files) > MIN_FILES:
                diff = hg._get_json_diff_from_hg(curr)
                num_changes = sum(len(d.changes) for d in diff)
                score = num_changes / len(diff)
                if score < least:
                    least = score
                    Log.note(
                        "smallest = {{rev}}, num_lines={{num}}, num_files={{files}}",
                        rev=curr.changeset.id,
                        num=num_changes,
                        files=len(diff))
            todo.extend(listwrap(curr.parents))

    except Exception as e:
        Log.error("Problem with scna", e)
    finally:
        Log.stop()
Exemplo n.º 28
0
        now = Date.now().unix
        if time_offset is None:
            time_offset = now - request.meta.request_time

        next_request = request.meta.request_time + time_offset
        if next_request > now:
            Log.note("Next request in {{wait_time}}",
                     wait_time=Duration(seconds=next_request - now))
            Till(till=next_request).wait()

        Thread.run("request " + text_type(request_count), one_request, request)
        request_count += 1
        queue.commit()


if __name__ == '__main__':
    try:
        tmp_signal = Signal()
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)

        queue_consumer(kwargs=config, please_stop=tmp_signal)
        worker = Thread.run("sqs consumer", queue_consumer, kwargs=config)
        MAIN_THREAD.wait_for_shutdown_signal(allow_exit=True,
                                             please_stop=worker.stopped)
    except BaseException as e:
        Log.error("Serious problem with consumer construction! Shutdown!",
                  cause=e)
Exemplo n.º 29
0
                    STDOUT.write(value2json({"out": local['_return']}))
                    STDOUT.write('\n')
        except Exception as e:
            STDOUT.write(value2json({"err": e}))
            STDOUT.write('\n')
        finally:
            STDOUT.flush()


num_temps = 0


def temp_var():
    global num_temps
    try:
        return "temp_var" + text_type(num_temps)
    finally:
        num_temps += 1


if __name__ == "__main__":
    try:
        config = json2value(sys.stdin.readline().decode('utf8'))
        constants.set(config.constants)
        Log.start(set_default(config.debug, {"logs": [{"type": "raw"}]}))
        command_loop({"config": config})
    except Exception as e:
        Log.error("problem staring worker", cause=e)
    finally:
        Log.stop()
Exemplo n.º 30
0
 def setUpClass(cls):
     Log.start({"trace": False})
Exemplo n.º 31
0
from mo_dots import to_data, from_data, listwrap, is_many
from mo_files import mimetype
from mo_future import first
from mo_http import http
from mo_logs import Log
from mo_math import is_nan
from mo_times import Date, YEAR, WEEK, MONTH
from pandas import DataFrame

from utils import nice_ceiling

# PROVINCE = 7  # Ontario
# PROVINCE = 10  # Alberta
PROVINCE = 11  # British Columbia

Log.start(trace=True)

http.DEBUG = True
http.default_headers = to_data({
    "From": "*****@*****.**",
    "Referer": "https://github.com/klahnakoski/mo-statcan",
    "User-Agent": "mo-statscan",
    "Accept": mimetype.ANY,
})

# LESS DETAILED CAUSES
CAUSE_OF_DEATH = (
    13_10_0394  # https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1310039401
)

# DETAILED CAUSES
Exemplo n.º 32
0
            Log.note("Skipping try revision.")
            queue.commit()
            continue

        now = Date.now().unix
        if time_offset is None:
            time_offset = now - request.meta.request_time

        next_request = request.meta.request_time + time_offset
        if next_request > now:
            Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now))
            Till(till=next_request).wait()

        Thread.run("request "+text_type(request_count), one_request, request)
        request_count += 1
        queue.commit()


if __name__ == '__main__':
    try:
        tmp_signal = Signal()
        config = startup.read_settings()
        constants.set(config.constants)
        Log.start(config.debug)

        queue_consumer(kwargs=config, please_stop=tmp_signal)
        worker = Thread.run("sqs consumer", queue_consumer, kwargs=config)
        MAIN_THREAD.wait_for_shutdown_signal(allow_exit=True, please_stop=worker.stopped)
    except BaseException as e:
        Log.error("Serious problem with consumer construction! Shutdown!", cause=e)
Exemplo n.º 33
0
 def setUp(self):
     Log.start({"trace": True})
Exemplo n.º 34
0
 def setUpClass(cls):
     Log.start({"trace": True, "cprofile": False})
Exemplo n.º 35
0
    def run(self, force=False, restart=False, merge=False):
        # SETUP LOGGING
        settings = startup.read_settings(filename=CONFIG_FILE)
        constants.set(settings.constants)
        Log.start(settings.debug)

        if not settings.extractor.app_name:
            Log.error("Expecting an extractor.app_name in config file")

        # SETUP DESTINATION
        destination = bigquery.Dataset(
            dataset=settings.extractor.app_name, kwargs=settings.destination
        ).get_or_create_table(settings.destination)

        try:
            if merge:
                with Timer("merge shards"):
                    destination.merge_shards()

            # RECOVER LAST SQL STATE
            redis = Redis()
            state = redis.get(settings.extractor.key)

            if restart or not state:
                state = (0, 0)
                redis.set(settings.extractor.key, value2json(state).encode("utf8"))
            else:
                state = json2value(state.decode("utf8"))

            last_modified, job_id = state

            # SCAN SCHEMA, GENERATE EXTRACTION SQL
            extractor = MySqlSnowflakeExtractor(settings.source)
            canonical_sql = extractor.get_sql(SQL("SELECT 0"))

            # ENSURE SCHEMA HAS NOT CHANGED SINCE LAST RUN
            old_sql = redis.get(settings.extractor.sql)
            if old_sql and old_sql.decode("utf8") != canonical_sql.sql:
                if force:
                    Log.warning("Schema has changed")
                else:
                    Log.error("Schema has changed")
            redis.set(settings.extractor.sql, canonical_sql.sql.encode("utf8"))

            # SETUP SOURCE
            source = MySQL(settings.source.database)

            while True:
                Log.note(
                    "Extracting jobs for last_modified={{last_modified|datetime|quote}}, job.id={{job_id}}",
                    last_modified=last_modified,
                    job_id=job_id,
                )

                # Example: job.id ==283890114
                # get_ids = ConcatSQL(
                #     (SQL_SELECT, sql_alias(quote_value(283890114), "id"))
                # )
                # get_ids = sql_query(
                #     {
                #         "from": "job",
                #         "select": ["id"],
                #         "where": {
                #             "or": [
                #                 {"gt": {"last_modified": parse(last_modified)}},
                #                 {
                #                     "and": [
                #                         {"eq": {"last_modified": parse(last_modified)}},
                #                         {"gt": {"id": job_id}},
                #                     ]
                #                 },
                #             ]
                #         },
                #         "sort": ["last_modified", "id"],
                #         "limit": settings.extractor.chunk_size,
                #     }
                # )

                get_ids = SQL(str(
                    (
                        Job.objects.filter(
                            Q(last_modified__gt=parse(last_modified).datetime)
                            | (
                                Q(last_modified=parse(last_modified).datetime)
                                & Q(id__gt=job_id)
                            )
                        )
                        .annotate()
                        .values("id")
                        .order_by("last_modified", "id")[
                            : settings.extractor.chunk_size
                        ]
                    ).query
                ))

                sql = extractor.get_sql(get_ids)

                # PULL FROM source, AND PUSH TO destination
                acc = []
                with source.transaction():
                    cursor = source.query(sql, stream=True, row_tuples=True)
                    extractor.construct_docs(cursor, acc.append, False)
                if not acc:
                    break
                destination.extend(acc)

                # RECORD THE STATE
                last_doc = acc[-1]
                last_modified, job_id = last_doc.last_modified, last_doc.id
                redis.set(
                    settings.extractor.key,
                    value2json((last_modified, job_id)).encode("utf8"),
                )

                if len(acc) < settings.extractor.chunk_size:
                    break

        except Exception as e:
            Log.warning("problem with extraction", cause=e)

        Log.note("done job extraction")

        try:
            with Timer("merge shards"):
                destination.merge_shards()
        except Exception as e:
            Log.warning("problem with merge", cause=e)

        Log.note("done job merge")