Exemplo n.º 1
0
def mspass_func_wrapper_multi(func,
                              data1,
                              data2,
                              *args,
                              preserve_history=False,
                              instance=None,
                              dryrun=False,
                              **kwargs):
    """
    This wrapper serves the same functionality as mspass_func_wrapper, but there are a few differences. The first is
    this wrapper accepts two mspasspy data objects as input data. The second is that inplace_return is not implemented
    here. The same processing history and error logs will be duplicated and stored in both of the input.

    :param func: target function
    :param data1: input data, only mspasspy data objects are accepted, i.e. TimeSeries, Seismogram, Ensemble.
    :param data2: input data, only mspasspy data objects are accepted, i.e. TimeSeries, Seismogram, Ensemble.
    :param args: extra arguments
    :param preserve_history: True to preserve this processing history in the data object, False not to. preserve_history
     and instance are intimately related and control how object level history is handled.
     Object level history is disabled by default for efficiency.  If preserve_history is set True and the string passed
     as instance is defined (not None which is the default) each Seismogram or TimeSeries object will attempt to
     save the history through a new_map operation.   If the history chain is empty this will silently generate
     an error posted to error log on each object.
    :param instance: instance is a unique id to record the usage of func while preserving the history.
    :type instance: str
    :param dryrun: True for dry-run, the algorithm is not run, but the arguments used in this wrapper will be checked.
      This is useful for pre-run checks of a large job to validate a workflow. Errors generate exceptions
      but the function returns before attempting any calculations.
    :param kwargs: extra kv arguments
    :return: the output of func
    """
    if not isinstance(
            data1,
        (Seismogram, TimeSeries, SeismogramEnsemble, TimeSeriesEnsemble)):
        raise TypeError(
            "mspass_func_wrapper_multi only accepts mspass object as data input"
        )

    if not isinstance(
            data2,
        (Seismogram, TimeSeries, SeismogramEnsemble, TimeSeriesEnsemble)):
        raise TypeError(
            "mspass_func_wrapper_multi only accepts mspass object as data input"
        )

    algname = func.__name__

    if preserve_history and instance is None:
        raise ValueError(
            algname + ": preserve_history was true but instance not defined")
    if dryrun:
        return "OK"

    try:
        res = func(data1, data2, *args, **kwargs)
        if preserve_history:
            logging_helper.info(data1, algname, instance)
            logging_helper.info(data2, algname, instance)
        return res
    except RuntimeError as err:
        if isinstance(data1, (Seismogram, TimeSeries)):
            data1.elog.log_error(algname, str(err), ErrorSeverity.Invalid)
        else:
            logging_helper.ensemble_error(data1, algname, err,
                                          ErrorSeverity.Invalid)
        if isinstance(data2, (Seismogram, TimeSeries)):
            data2.elog.log_error(algname, str(err), ErrorSeverity.Invalid)
        else:
            logging_helper.ensemble_error(data2, algname, err,
                                          ErrorSeverity.Invalid)
    except MsPASSError as ex:
        if ex.severity == ErrorSeverity.Fatal:
            raise
        if isinstance(data1, (Seismogram, TimeSeries)):
            data1.elog.log_error(algname, ex.message, ex.severity)
        else:
            logging_helper.ensemble_error(data1, algname, ex.message,
                                          ex.severity)
        if isinstance(data2, (Seismogram, TimeSeries)):
            data2.elog.log_error(algname, ex.message, ex.severity)
        else:
            logging_helper.ensemble_error(data2, algname, ex.message,
                                          ex.severity)
Exemplo n.º 2
0
    def test_main(self):
        self.db["wf_TimeSeries"].delete_many({})
        ts1 = copy.deepcopy(self.test_ts)
        ts2 = copy.deepcopy(self.test_ts)
        ts3 = copy.deepcopy(self.test_ts)
        logging_helper.info(ts1, "1", "deepcopy")
        logging_helper.info(ts2, "1", "deepcopy")
        logging_helper.info(ts3, "1", "deepcopy")

        # fix types
        ts1["npts"] = "123"
        ts2["delta"] = "3"
        ts3["npts"] = "xyz"

        save_res_code = self.db.save_data(ts1,
                                          mode="promiscuous",
                                          storage_mode="gridfs",
                                          exclude_keys=["extra2"])
        save_res_code = self.db.save_data(ts2,
                                          mode="promiscuous",
                                          storage_mode="gridfs",
                                          exclude_keys=["extra2"])
        save_res_code = self.db.save_data(ts3,
                                          mode="promiscuous",
                                          storage_mode="gridfs",
                                          exclude_keys=["extra2"])

        # exit
        with pytest.raises(SystemExit) as e:
            dbclean.main(["test_dbclean", "wf_TimeSeries"])
        assert e.type == SystemExit
        assert e.value.code == -1

        # delete starttime attribute
        # rename calib to rename_calib
        dbclean.main([
            "test_dbclean",
            "wf_TimeSeries",
            "-ft",
            "-d",
            "starttime",
            "-r",
            "calib:rename_calib",
        ])

        res1 = self.db["wf_TimeSeries"].find_one({"_id": ts1["_id"]})
        res2 = self.db["wf_TimeSeries"].find_one({"_id": ts2["_id"]})
        res3 = self.db["wf_TimeSeries"].find_one({"_id": ts3["_id"]})

        assert res1["npts"] == 123
        assert "starttime" not in res1
        assert "calib" not in res1
        assert "rename_calib" in res1

        assert res2["delta"] == 3.0
        assert "starttime" not in res2
        assert "calib" not in res2
        assert "rename_calib" in res2

        # can't be fixed
        assert res3["npts"] == "xyz"
        assert "starttime" not in res3
        assert "calib" not in res3
        assert "rename_calib" in res3

        self.db["wf_TimeSeries"].delete_many({})
        ts1 = copy.deepcopy(self.test_ts)
        ts2 = copy.deepcopy(self.test_ts)
        ts3 = copy.deepcopy(self.test_ts)
        logging_helper.info(ts1, "1", "deepcopy")
        logging_helper.info(ts2, "1", "deepcopy")
        logging_helper.info(ts3, "1", "deepcopy")

        # fix types
        ts1["npts"] = "123"
        ts2["delta"] = "3"
        ts3["npts"] = "xyz"

        save_res_code = self.db.save_data(ts1,
                                          mode="promiscuous",
                                          storage_mode="gridfs",
                                          exclude_keys=["extra2"])
        save_res_code = self.db.save_data(ts2,
                                          mode="promiscuous",
                                          storage_mode="gridfs",
                                          exclude_keys=["extra2"])
        save_res_code = self.db.save_data(ts3,
                                          mode="promiscuous",
                                          storage_mode="gridfs",
                                          exclude_keys=["extra2"])

        # only fix types
        dbclean.main(["test_dbclean", "wf_TimeSeries", "-ft"])
        assert res1["npts"] == 123
        assert res2["delta"] == 3.0
        # can't be fixed
        assert res3["npts"] == "xyz"
Exemplo n.º 3
0
def mspass_func_wrapper(func,
                        data,
                        *args,
                        preserve_history=False,
                        instance=None,
                        dryrun=False,
                        inplace_return=False,
                        **kwargs):
    """
    This function serves as a decorator wrapper, which is widely used in mspasspy library. It executes the target
    function on input data. Data are restricted to be mspasspy objects. It also preserves the processing history and
    error logs into the mspasspy objects. By wrapping your function using this decorator, you can save some workload.
    Runtime error won't be raised in order to be efficient in map-reduce operations. MspassError with a severity Fatal
    will be raised, others won't be raised.

    :param func: target function
    :param data: input data, only mspasspy data objects are accepted, i.e. TimeSeries, Seismogram, Ensemble.
    :param args: extra arguments
    :param preserve_history: True to preserve this processing history in the data object, False not to. preserve_history
     and instance are intimately related and control how object level history is handled.
     Object level history is disabled by default for efficiency.  If preserve_history is set True and the string passed
     as instance is defined (not None which is the default) each Seismogram or TimeSeries object will attempt to
     save the history through a new_map operation.   If the history chain is empty this will silently generate
     an error posted to error log on each object.
    :param instance: instance is a unique id to record the usage of func while preserving the history.
    :type instance: str
    :param dryrun: True for dry-run, the algorithm is not run, but the arguments used in this wrapper will be checked.
      This is useful for pre-run checks of a large job to validate a workflow. Errors generate exceptions
      but the function returns before attempting any calculations.
    :param inplace_return: when func is an in-place function that doesn't return anything, but you want to
     return the origin data (for example, in map-reduce), set inplace_return as true.
    :param kwargs: extra kv arguments
    :return: origin data or the output of func
    """
    if not isinstance(
            data,
        (Seismogram, TimeSeries, SeismogramEnsemble, TimeSeriesEnsemble)):
        raise TypeError(
            "mspass_func_wrapper only accepts mspass object as data input")

    algname = func.__name__

    if preserve_history and instance is None:
        raise ValueError(
            algname + ": preserve_history was true but instance not defined")
    if dryrun:
        return "OK"

    try:
        res = func(data, *args, **kwargs)
        if preserve_history:
            logging_helper.info(data, algname, instance)
        if res is None and inplace_return:
            return data
        return res
    except RuntimeError as err:
        if isinstance(data, (Seismogram, TimeSeries)):
            data.elog.log_error(algname, str(err), ErrorSeverity.Invalid)
        else:
            logging_helper.ensemble_error(data, algname, err,
                                          ErrorSeverity.Invalid)
    except MsPASSError as ex:
        if ex.severity == ErrorSeverity.Fatal:
            raise
        if isinstance(data, (Seismogram, TimeSeries)):
            data.elog.log_error(algname, ex.message, ex.severity)
        else:
            logging_helper.ensemble_error(data, algname, ex.message,
                                          ex.severity)
Exemplo n.º 4
0
    def test_main(self):
        self.db['wf_TimeSeries'].delete_many({})
        ts1 = copy.deepcopy(self.test_ts)
        ts2 = copy.deepcopy(self.test_ts)
        ts3 = copy.deepcopy(self.test_ts)
        logging_helper.info(ts1, 'deepcopy', '1')
        logging_helper.info(ts2, 'deepcopy', '1')
        logging_helper.info(ts3, 'deepcopy', '1')

        # fix types
        ts1['npts'] = '123'
        ts2['delta'] = '3'
        ts3['npts'] = 'xyz'

        save_res_code = self.db.save_data(ts1,
                                          mode='promiscuous',
                                          storage_mode='gridfs',
                                          exclude_keys=['extra2'])
        save_res_code = self.db.save_data(ts2,
                                          mode='promiscuous',
                                          storage_mode='gridfs',
                                          exclude_keys=['extra2'])
        save_res_code = self.db.save_data(ts3,
                                          mode='promiscuous',
                                          storage_mode='gridfs',
                                          exclude_keys=['extra2'])

        # exit
        with pytest.raises(SystemExit) as e:
            dbclean.main(['test_dbclean', 'wf_TimeSeries'])
        assert e.type == SystemExit
        assert e.value.code == -1

        # delete starttime attribute
        # rename calib to rename_calib
        dbclean.main([
            'test_dbclean', 'wf_TimeSeries', '-ft', '-d', 'starttime', '-r',
            'calib:rename_calib'
        ])

        res1 = self.db['wf_TimeSeries'].find_one({'_id': ts1['_id']})
        res2 = self.db['wf_TimeSeries'].find_one({'_id': ts2['_id']})
        res3 = self.db['wf_TimeSeries'].find_one({'_id': ts3['_id']})

        assert res1['npts'] == 123
        assert 'starttime' not in res1
        assert 'calib' not in res1
        assert 'rename_calib' in res1

        assert res2['delta'] == 3.0
        assert 'starttime' not in res2
        assert 'calib' not in res2
        assert 'rename_calib' in res2

        # can't be fixed
        assert res3['npts'] == 'xyz'
        assert 'starttime' not in res3
        assert 'calib' not in res3
        assert 'rename_calib' in res3

        self.db['wf_TimeSeries'].delete_many({})
        ts1 = copy.deepcopy(self.test_ts)
        ts2 = copy.deepcopy(self.test_ts)
        ts3 = copy.deepcopy(self.test_ts)
        logging_helper.info(ts1, 'deepcopy', '1')
        logging_helper.info(ts2, 'deepcopy', '1')
        logging_helper.info(ts3, 'deepcopy', '1')

        # fix types
        ts1['npts'] = '123'
        ts2['delta'] = '3'
        ts3['npts'] = 'xyz'

        save_res_code = self.db.save_data(ts1,
                                          mode='promiscuous',
                                          storage_mode='gridfs',
                                          exclude_keys=['extra2'])
        save_res_code = self.db.save_data(ts2,
                                          mode='promiscuous',
                                          storage_mode='gridfs',
                                          exclude_keys=['extra2'])
        save_res_code = self.db.save_data(ts3,
                                          mode='promiscuous',
                                          storage_mode='gridfs',
                                          exclude_keys=['extra2'])

        # only fix types
        dbclean.main(['test_dbclean', 'wf_TimeSeries', '-ft'])
        assert res1['npts'] == 123
        assert res2['delta'] == 3.0
        # can't be fixed
        assert res3['npts'] == 'xyz'
Exemplo n.º 5
0
    def test_main(self, capfd):
        self.db['wf_TimeSeries'].delete_many({})
        ts1 = copy.deepcopy(self.test_ts)
        ts2 = copy.deepcopy(self.test_ts)
        ts3 = copy.deepcopy(self.test_ts)
        logging_helper.info(ts1, 'deepcopy', '1')
        logging_helper.info(ts2, 'deepcopy', '1')
        logging_helper.info(ts3, 'deepcopy', '1')

        # fix types
        ts1['npts'] = '123'
        ts1['extra1'] = 'extra1'
        ts2['delta'] = '3'
        ts2['extra2'] = 'extra2'
        ts3['npts'] = 'xyz'
        ts3['extra2'] = 'extra2'
        # wrong normalized key
        ts1['site_id'] = ObjectId()
        ts2.erase('source_id')

        save_res_code = self.db.save_data(ts1,
                                          mode='promiscuous',
                                          storage_mode='gridfs')
        save_res_code = self.db.save_data(ts2,
                                          mode='promiscuous',
                                          storage_mode='gridfs')
        # erase required attributes
        save_res_code = self.db.save_data(ts3,
                                          mode='promiscuous',
                                          storage_mode='gridfs',
                                          exclude_keys=['starttime'])
        doc1 = self.db['wf_TimeSeries'].find_one({'_id': ts1['_id']})
        doc2 = self.db['wf_TimeSeries'].find_one({'_id': ts2['_id']})
        doc3 = self.db['wf_TimeSeries'].find_one({'_id': ts3['_id']})
        doc1_str = json_util.dumps(doc1, indent=2)
        doc2_str = json_util.dumps(doc2, indent=2)
        doc3_str = json_util.dumps(doc3, indent=2)

        # default normalization test
        dbverify.main(['test_dbverify', '-t', 'normalization'])
        out, err = capfd.readouterr()
        assert out == "normalization test on normalized key= site_id  found problems\nFound broken links in  1 documents checked\nNote error count limit= 1000\nIf the count is the same it means all data probably contain missing cross referencing ids\nRun in verbose mode to find out more information you will need to fix the problem\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\n"

        # more than 1 collection to test
        dbverify.main([
            'test_dbverify', '-t', 'normalization', '-c', 'wf_TimeSeries',
            'site'
        ])
        out, err = capfd.readouterr()
        assert out == "WARNING:  normalization test can only be run on one collection at a time\nParsed a list with the following contents:   ['wf_TimeSeries', 'site']\nRunning test on the first item in that list\nnormalization test on normalized key= site_id  found problems\nFound broken links in  1 documents checked\nNote error count limit= 1000\nIf the count is the same it means all data probably contain missing cross referencing ids\nRun in verbose mode to find out more information you will need to fix the problem\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\n"

        # verbose mode
        dbverify.main(['test_dbverify', '-t', 'normalization', '-v'])
        out, err = capfd.readouterr()
        assert out == "check_link found the following docs in  wf_TimeSeries  with broken links to  site_id\n////////////////Doc number  1  with error///////////////\n" + doc1_str + "\n////////////////////////////////////////////////////////\ncheck_links found no undefined linking key to normalized key= site_id\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no undefined linking key to normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\ncheck_link found the following docs in  wf_TimeSeries  with undefined link keys to  source_id\n////////////////Doc number  1  with error///////////////\n" + doc2_str + "\n////////////////////////////////////////////////////////\n"

        # default required test
        dbverify.main(['test_dbverify', '-t', 'required'])
        out, err = capfd.readouterr()
        mmkeys = {'npts': 2, 'delta': 1}
        mm_keys_str = json_util.dumps(mmkeys, indent=2)
        undef_keys = {'starttime': 1}
        undef_keys_str = json_util.dumps(undef_keys, indent=2)
        assert out == "////Results from run_check_required on collection= wf_TimeSeries\nCollection found  3  documents with type inconsistencies\nOffending keys and number found follow:\n" + mm_keys_str + "\nCollection found  1  documents with required keys that were not defined\nOffending keys and number found follow:\n" + undef_keys_str + "\n"

        # default schema_check test
        dbverify.main(['test_dbverify', '-t', 'schema_check'])
        out, err = capfd.readouterr()
        mmkeys = {'npts': 2, 'delta': 1}
        mm_keys_str = json_util.dumps(mmkeys, indent=2)
        undef_keys = {'extra1': 1, 'extra2': 2}
        undef_keys_str = json_util.dumps(undef_keys, indent=2)
        assert out == "check_attribute_types result for collection= wf_TimeSeries\nCollection found  3  documents with type inconsistencies\nOffending keys and number found follow:\n" + mm_keys_str + "\nCollection found  3  documents with keys not defined in the schema\nOffending keys and number found follow:\n" + undef_keys_str + "\n"
Exemplo n.º 6
0
def mspass_func_wrapper(func,
                        data,
                        *args,
                        object_history=False,
                        alg_id=None,
                        alg_name=None,
                        dryrun=False,
                        inplace_return=False,
                        function_return_key=None,
                        **kwargs):
    """
    Decorator wrapper to adapt a simple function to the mspass parallel processing framework.

    This function serves as a decorator wrapper, which is widely used in mspasspy library. It executes the target
    function on input data. Data are restricted to be mspasspy objects. It also preserves the processing history and
    error logs into the mspasspy objects. By wrapping your function using this decorator, you can save some workload.
    Runtime error won't be raised in order to be efficient in map-reduce operations. MspassError with a severity Fatal
    will be raised, others won't be raised.

    :param func: target function
    :param data: input data, only mspasspy data objects are accepted, i.e. TimeSeries, Seismogram, Ensemble.
    :param args: extra arguments
    :param object_history: True to preserve this processing history in the data object, False not to. object_history
     and alg_id are intimately related and control how object level history is handled.
     Object level history is disabled by default for efficiency.  If object_history is set True and the string passed
     as alg_id is defined (not None which is the default) each Seismogram or TimeSeries object will attempt to
     save the history through a new_map operation.   If the history chain is empty this will silently generate
     an error posted to error log on each object.
    :param alg_id: alg_id is a unique id to record the usage of func while preserving the history.
    :type alg_id: :class:`bson.objectid.ObjectId`
    :param alg_name: alg_name is the name the func we are gonna save while preserving the history.
    :type alg_name: :class:`str`
    :param dryrun: True for dry-run, the algorithm is not run, but the arguments used in this wrapper will be checked.
      This is useful for pre-run checks of a large job to validate a workflow. Errors generate exceptions
      but the function returns before attempting any calculations.
    :param inplace_return: when func is an in-place function that doesn't return anything, but you want to
     return the origin data (for example, in map-reduce), set inplace_return as true.
    :param function_return_key:  Some functions one might want to wrap with this decorator
     return something that is appropriate to save as Metadata.  If so, use this argument to
     define the key used to set that field in the data that is returned.
     This feature should normally be considered as a way to wrap an existing
     algorithm that you do not wish to alter, but which returns something useful.
     In principle that return can be almost anything, but we recommend this feature
     be limited to only simple types (i.e. int, float, etc.).  The decorator makes
     no type checks so the caller is responsible for assuring what is posted will not cause
     downstream problems.  The default for this parameter is None, which
     is taken to mean any return of the wrapped function will be ignored.  Note
     that when function_return_key is anything but None, it is assumed the
     returned object is the (usually modified) data object.
    :param kwargs: extra kv arguments
    :return: origin data or the output of func
    """
    if not isinstance(
            data,
        (Seismogram, TimeSeries, SeismogramEnsemble, TimeSeriesEnsemble)):
        raise TypeError(
            "mspass_func_wrapper only accepts mspass object as data input")

    # if not defined
    if not alg_name:
        alg_name = func.__name__

    if object_history and alg_id is None:
        raise ValueError(alg_name +
                         ": object_history was true but alg_id not defined")

    if dryrun:
        return "OK"

    if is_input_dead(data):
        return data

    try:
        res = func(data, *args, **kwargs)
        if object_history:
            logging_helper.info(data, alg_id, alg_name)
        if function_return_key is not None:
            if isinstance(function_return_key, str):
                data[function_return_key] = res
            else:
                data.elog.log_error(
                    alg_name,
                    "Illegal type received for function_return_key argument=" +
                    str(type(function_return_key)) +
                    "\nReturn value not saved in Metadata",
                    ErrorSeverity.Complaint,
                )
            if not inplace_return:
                data.elog.log_error(
                    alg_name,
                    "Inconsistent arguments; inplace_return was set False and function_return_key was not None.\nAssuming inplace_return == True is correct",
                    ErrorSeverity.Complaint,
                )
            return data
        elif inplace_return:
            return data
        else:
            return res
    except RuntimeError as err:
        if isinstance(data, (Seismogram, TimeSeries)):
            data.elog.log_error(alg_name, str(err), ErrorSeverity.Invalid)
        else:
            logging_helper.ensemble_error(data, alg_name, err,
                                          ErrorSeverity.Invalid)
        # some unexpected error happen, if inplace_return is true, we may want to return the original data
        if inplace_return:
            return data
    except MsPASSError as ex:
        if ex.severity == ErrorSeverity.Fatal:
            raise
        if isinstance(data, (Seismogram, TimeSeries)):
            data.elog.log_error(alg_name, ex.message, ex.severity)
        else:
            logging_helper.ensemble_error(data, alg_name, ex.message,
                                          ex.severity)
        # some unexpected error happen, if inplace_return is true, we may want to return the original data
        if inplace_return:
            return data
Exemplo n.º 7
0
    def test_main(self, capfd):
        self.db["wf_TimeSeries"].delete_many({})
        ts1 = copy.deepcopy(self.test_ts)
        ts2 = copy.deepcopy(self.test_ts)
        ts3 = copy.deepcopy(self.test_ts)
        logging_helper.info(ts1, "1", "deepcopy")
        logging_helper.info(ts2, "1", "deepcopy")
        logging_helper.info(ts3, "1", "deepcopy")

        # fix types
        ts1["npts"] = "123"
        ts1["extra1"] = "extra1"
        ts2["delta"] = "3"
        ts2["extra2"] = "extra2"
        ts3["npts"] = "xyz"
        ts3["extra2"] = "extra2"
        # wrong normalized key
        ts1["site_id"] = ObjectId()
        ts2.erase("source_id")

        save_res_code = self.db.save_data(
            ts1, mode="promiscuous", storage_mode="gridfs"
        )
        save_res_code = self.db.save_data(
            ts2, mode="promiscuous", storage_mode="gridfs"
        )
        # erase required attributes
        save_res_code = self.db.save_data(
            ts3, mode="promiscuous", storage_mode="gridfs", exclude_keys=["starttime"]
        )
        doc1 = self.db["wf_TimeSeries"].find_one({"_id": ts1["_id"]})
        doc2 = self.db["wf_TimeSeries"].find_one({"_id": ts2["_id"]})
        doc3 = self.db["wf_TimeSeries"].find_one({"_id": ts3["_id"]})
        doc1_str = json_util.dumps(doc1, indent=2)
        doc2_str = json_util.dumps(doc2, indent=2)
        doc3_str = json_util.dumps(doc3, indent=2)

        # default normalization test
        dbverify.main(["test_dbverify", "-t", "normalization"])
        out, err = capfd.readouterr()
        assert (
            out
            == "normalization test on normalized key= site_id  found problems\nFound broken links in  1 documents checked\nNote error count limit= 1000\nIf the count is the same it means all data probably contain missing cross referencing ids\nRun in verbose mode to find out more information you will need to fix the problem\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\n"
        )

        # more than 1 collection to test
        dbverify.main(
            ["test_dbverify", "-t", "normalization", "-c", "wf_TimeSeries", "site"]
        )
        out, err = capfd.readouterr()
        assert (
            out
            == "WARNING:  normalization test can only be run on one collection at a time\nParsed a list with the following contents:   ['wf_TimeSeries', 'site']\nRunning test on the first item in that list\nnormalization test on normalized key= site_id  found problems\nFound broken links in  1 documents checked\nNote error count limit= 1000\nIf the count is the same it means all data probably contain missing cross referencing ids\nRun in verbose mode to find out more information you will need to fix the problem\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\n"
        )

        # verbose mode
        dbverify.main(["test_dbverify", "-t", "normalization", "-v"])
        out, err = capfd.readouterr()
        assert (
            out
            == "check_link found the following docs in  wf_TimeSeries  with broken links to  site_id\n////////////////Doc number  1  with error///////////////\n"
            + doc1_str
            + "\n////////////////////////////////////////////////////////\ncheck_links found no undefined linking key to normalized key= site_id\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no undefined linking key to normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\ncheck_link found the following docs in  wf_TimeSeries  with undefined link keys to  source_id\n////////////////Doc number  1  with error///////////////\n"
            + doc2_str
            + "\n////////////////////////////////////////////////////////\n"
        )

        # default required test
        dbverify.main(["test_dbverify", "-t", "required"])
        out, err = capfd.readouterr()
        mmkeys = {"npts": 2, "delta": 1}
        mm_keys_str = json_util.dumps(mmkeys, indent=2)
        undef_keys = {"starttime": 1}
        undef_keys_str = json_util.dumps(undef_keys, indent=2)
        assert (
            out
            == "////Results from run_check_required on collection= wf_TimeSeries\nCollection found  3  documents with type inconsistencies\nOffending keys and number found follow:\n"
            + mm_keys_str
            + "\nCollection found  1  documents with required keys that were not defined\nOffending keys and number found follow:\n"
            + undef_keys_str
            + "\n"
        )

        # default schema_check test
        dbverify.main(["test_dbverify", "-t", "schema_check"])
        out, err = capfd.readouterr()
        mmkeys = {"npts": 2, "delta": 1}
        mm_keys_str = json_util.dumps(mmkeys, indent=2)
        undef_keys = {"extra1": 1, "extra2": 2}
        undef_keys_str = json_util.dumps(undef_keys, indent=2)
        assert (
            out
            == "check_attribute_types result for collection= wf_TimeSeries\nCollection found  3  documents with type inconsistencies\nOffending keys and number found follow:\n"
            + mm_keys_str
            + "\nCollection found  3  documents with keys not defined in the schema\nOffending keys and number found follow:\n"
            + undef_keys_str
            + "\n"
        )