Пример #1
0
def get_dreport_str_iter(session, download_ids=None, config=True, log=True):
    '''Returns an iterator yielding the download report (log and config) for the given
    download_ids

    :param session: an sql-alchemy session denoting a db session to a database
    :param download_ids: (list of ints or None) if None, collect statistics from all downloads run.
        Otherwise limit the output to the downloads whose ids are in the list
    :param config: boolean (default: True). Whether to show the download config
    :param log: boolean (default: True). Whether to show the download log messages
    '''
    data = infoquery(session, download_ids, config, log)
    for dwnl_id, dwnl_time, configtext, logtext in data:
        yield ''
        yield ascii_decorate('Download id: %d (%s)' %
                             (dwnl_id, str(dwnl_time)))
        if config and log:
            yield ''
            yield 'Configuration:%s' % (' N/A' if not configtext else '')
        if configtext:
            yield ''
            yield configtext
        if config and log:
            yield ''
            yield 'Log messages:%s' % (' N/A' if not configtext else '')
        if logtext:
            yield ''
            yield logtext
Пример #2
0
    def test_dstats_no_segments(self, mock_gettempdir, mock_open_in_browser, db, pytestdir):
        '''test a case where save inventory is True, and that we saved inventories'''

        # mock  a download with only inventories, i.e. with no segments downloaded
        dwnl = Download()
        dwnl.run_time = datetime(2018, 12, 2, 16, 46, 56, 472330)
        db.session.add(dwnl)
        db.session.commit()

        runner = CliRunner()

        # text output, to file
        outfile = pytestdir.newfile('.txt')
        result = runner.invoke(cli, ['utils', self.CMD_NAME, '--dburl', db.dburl, outfile])

        assert not result.exception
        content = open(outfile).read()
        assert """
                              OK        OK         Time                 Segment           Internal       
                              Gaps      Partially  Span   MSeed  Url    Not      Bad      Server         
                          OK  Overlaps  Saved      Error  Error  Error  Found    Request  Error     TOTAL
------------------------  --  --------  ---------  -----  -----  -----  -------  -------  --------  -----
www.dc1/dataselect/query   3         1          2      1      1      1        1        1         1     12
TOTAL                      3         1          2      1      1      1        1        1         1     12""" in content
        assert result.output.startswith("""Fetching data, please wait (this might take a while depending on the db size and connection)
download statistics written to """)
        assert not mock_open_in_browser.called
        assert not mock_gettempdir.called

        expected_string = ascii_decorate("Download id: 2")
        # result.output below is uncicode in PY2, whereas expected_string is str
        # Thus
        if PY2:
            expected_string = expected_string.decode('utf8')
        assert expected_string in content
        expected_string2 = """
Executed: 2018-12-02T16:46:56.472330
Event query parameters: N/A

No segments downloaded
"""
        assert expected_string2 in content[content.index(expected_string):]

        # run with html, test just that everything works fine
        result = runner.invoke(cli, ['utils', self.CMD_NAME, '--html', '--dburl', db.dburl, outfile])
        assert not result.exception
Пример #3
0
def get_dstats_str_iter(session, download_ids=None, maxgap_threshold=0.5):
    '''Returns an iterator yielding the download statistics and information matching the
    given parameters.
    The returned string can be joined and printed to screen or file and is made of tables
    showing the segment data on the db per data-center and download run, plus some download
    information.

    :param session: an sql-alchemy session denoting a db session to a database
    :param download_ids: (list of ints or None) if None, collect statistics from all downloads run.
        Otherwise limit the output to the downloads whose ids are in the list. In any case, in
        case of more download runs to be considered, this function will
        yield also the statistics aggregating all downloads in a table at the end
    :param maxgap_threshold: (float, default 0.5).
        Sets the threshold whereby a segment is to be
        considered with gaps or overlaps. By default is 0.5, meaning that a segment whose
        'maxgap_numsamples' value is > 0.5 has gaps, and a segment whose 'maxgap_numsamples'
        value is < 0.5 has overlaps. Such segments will be marked with a special class
        'OK Gaps Overlaps' in the table columns.
    '''
    # Benchmark: the bare minimum (with postgres on external server) request takes around 12
    # sec and 14 seconds adding all necessary information. Therefore, we choose the latter
    maxgap_bexpr = get_maxgap_sql_expr(maxgap_threshold)
    data = session.query(func.count(Segment.id), Segment.download_code,
                         Segment.datacenter_id, Segment.download_id,
                         maxgap_bexpr)
    data = filterquery(data, download_ids).group_by(Segment.download_id,
                                                    Segment.datacenter_id,
                                                    Segment.download_code,
                                                    maxgap_bexpr)

    dwlids = get_downloads(session, download_ids)
    show_aggregate_stats = len(dwlids) > 1
    dcurl = get_datacenters(session)
    if show_aggregate_stats:
        agg_statz = DownloadStats2()
    stas = defaultdict(lambda: DownloadStats2())
    GAP_OVLAP_CODE = DownloadStats2.GAP_OVLAP_CODE  # pylint: disable=invalid-name
    for segcount, dwn_code, dc_id, dwn_id, has_go in data:
        statz = stas[dwn_id]

        if dwn_code == 200 and has_go is True:
            dwn_code = GAP_OVLAP_CODE

        statz[dcurl[dc_id]][dwn_code] += segcount
        if show_aggregate_stats:
            agg_statz[dcurl[dc_id]][dwn_code] += segcount

    evparamlen = None  # used for alignement of strings (calculated lazily in loop below)
    for did, (druntime, evtparams) in viewitems(dwlids):
        yield ''
        yield ''
        yield ascii_decorate('Download id: %d' % did)
        yield ''
        yield 'Executed: %s' % str(druntime)
        yield "Event query parameters:%s" % (' N/A' if not evtparams else '')
        if evparamlen is None and evtparams:  # calculate eventparamlen for string alignement
            evparamlen = max(len(_) for _ in evtparams)
        for param in sorted(evtparams):
            yield ("  %-{:d}s = %s".format(evparamlen)) % (
                param, str(evtparams[param]))
        yield ''
        statz = stas.get(did)
        if statz is None:
            yield "No segments downloaded"
        else:
            yield "Downlaoaded segments per data center url (row) and response type (column):"
            yield ""
            yield str(statz)

    if show_aggregate_stats:
        yield ''
        yield ''
        yield ascii_decorate('Aggregated stats (all downloads)')
        yield ''
        yield str(agg_statz)
Пример #4
0
    def test_simple_dstats(self, mock_gettempdir, mock_open_in_browser, db, pytestdir):
        '''test a case where save inventory is True, and that we saved inventories'''

        runner = CliRunner()

        # text output, to file
        outfile = pytestdir.newfile('.txt')
        result = runner.invoke(cli, ['utils', self.CMD_NAME, '--dburl', db.dburl, outfile])

        assert not result.exception
        content = open(outfile).read()
        assert """
                              OK        OK         Time                 Segment           Internal       
                              Gaps      Partially  Span   MSeed  Url    Not      Bad      Server         
                          OK  Overlaps  Saved      Error  Error  Error  Found    Request  Error     TOTAL
------------------------  --  --------  ---------  -----  -----  -----  -------  -------  --------  -----
www.dc1/dataselect/query   3         1          2      1      1      1        1        1         1     12
TOTAL                      3         1          2      1      1      1        1        1         1     12""" in content
        assert result.output.startswith("""Fetching data, please wait (this might take a while depending on the db size and connection)
download statistics written to """)
        assert not mock_open_in_browser.called
        assert not mock_gettempdir.called

        # text output, to stdout
        result = runner.invoke(cli, ['utils', self.CMD_NAME, '--dburl', db.dburl])
        assert not result.exception
        assert """
                              OK        OK         Time                 Segment           Internal       
                              Gaps      Partially  Span   MSeed  Url    Not      Bad      Server         
                          OK  Overlaps  Saved      Error  Error  Error  Found    Request  Error     TOTAL
------------------------  --  --------  ---------  -----  -----  -----  -------  -------  --------  -----
www.dc1/dataselect/query   3         1          2      1      1      1        1        1         1     12
TOTAL                      3         1          2      1      1      1        1        1         1     12""" in result.output

        assert not mock_open_in_browser.called
        expected_string = ascii_decorate("Download id: 1")
        # result.output below is uncicode in PY2, whereas expected_string is str
        # Thus
        if PY2:
            expected_string = expected_string.decode('utf8')
        assert expected_string in result.output
        assert not mock_gettempdir.called

        # Test html output.
        # First, implement function that parses the sta_data content into dict and check stuff:
        def jsonloads(varname, content):
            start = re.search('%s\\s*:\\s*' % varname, content).end()
            end = start
            while content[end] != '[':
                end += 1
            end += 1
            brakets = 1
            while brakets > 0:
                if content[end] == "[":
                    brakets += 1
                elif content[end] == ']':
                    brakets -= 1
                end += 1
            return json.loads(content[start:end])

        # html output, to file
        outfile = pytestdir.newfile('.html')
        result = runner.invoke(cli, ['utils', self.CMD_NAME, '--html',  '--dburl', db.dburl, outfile])

        assert not result.exception
        content = open(outfile).read()

        sta_data = jsonloads('sta_data', content)
        networks = jsonloads('networks', content)
        assert len(sta_data) == db.session.query(Station.id).count() * 2
        for i in range(0, len(sta_data), 2):
            staname = sta_data[i]
            data = sta_data[i+1]
            # Each sta is: [sta_name, [staid, stalat, stalon, sta_dcid,
            #                        d_id1, [code1, num_seg1 , ..., codeN, num_seg],
            #                        d_id2, [code1, num_seg1 , ..., codeN, num_seg],
            #                       ]
            if staname == 'S1':
                assert data[0] == 1
                assert data[1] == 11  # lat
                assert data[2] == 11  # lon
                assert data[3] == 1  # dc id
                assert data[4] == networks.index('N1')
                assert data[5] == 1  # download id (only 1)
                # assert the correct segments categories
                # (list length is the number of categories,
                # each element is the num of segments for that category,
                # and the sum of all elements must be 4)
                assert sorted(data[6][1::2]) == [1, 1, 1, 1]
            elif staname == 'S2a':
                assert data[0] == 2
                assert data[1] == 22.1  # lat
                assert data[2] == 22.1  # lon
                assert data[3] == 1  # dc id
                assert data[4] == networks.index('N1')
                assert data[5] == 1  # download id (only 1)
                # assert the correct segments categories
                # (list length is the number of categories,
                # each element is the num of segments for that category,
                # and the sum of all elements must be 4)
                assert sorted(data[6][1::2]) == [1, 1, 2]
            elif staname == 'S2b':
                assert data[0] == 3
                assert data[1] == 22.2  # lat
                assert data[2] == 22.2  # lon
                assert data[3] == 1  # dc id
                assert data[4] == networks.index('N2')
                assert data[5] == 1  # download id (only 1)
                # assert the correct segments categories
                # (list length is the number of categories,
                # each element is the num of segments for that category,
                # and the sum of all elements must be 4)
                assert sorted(data[6][1::2]) == [1, 1, 1, 1]
            else:
                raise Exception('station should not be there: %s' % staname)

        assert result.output.startswith("""Fetching data, please wait (this might take a while depending on the db size and connection)
download statistics written to """)
        assert not mock_open_in_browser.called
        assert not mock_gettempdir.called

        # html output, to file, setting maxgap to 0.2, so that S1a' has all three ok segments
        # with gaps
        result = runner.invoke(cli, ['utils', self.CMD_NAME, '-g', '0.15', '--html',
                                     '--dburl', db.dburl, outfile])

        assert not result.exception

        content = open(outfile).read()
        # parse the sta_data content into dict and check stuff:
        sta_data = jsonloads('sta_data', content)
        networks = jsonloads('networks', content)
        assert len(sta_data) == db.session.query(Station.id).count() * 2
        for i in range(0, len(sta_data), 2):
            staname = sta_data[i]
            data = sta_data[i+1]
            # Each sta is: [sta_name, [staid, stalat, stalon, sta_dcid,
            #                        d_id1, [code1, num_seg1 , ..., codeN, num_seg],
            #                        d_id2, [code1, num_seg1 , ..., codeN, num_seg],
            #                       ]
            if staname == 'S1':
                assert data[0] == 1
                assert data[1] == 11  # lat
                assert data[2] == 11  # lon
                assert data[3] == 1  # dc id
                assert data[4] == networks.index('N1')
                assert data[5] == 1  # download id (only 1)
                # assert the correct segments categories
                # (list length is the number of categories,
                # each element is the num of segments for that category,
                # and the sum of all elements must be 4)
                assert sorted(data[6][1::2]) == [1, 1, 1, 1]
            elif staname == 'S2a':
                assert data[0] == 2
                assert data[1] == 22.1  # lat
                assert data[2] == 22.1  # lon
                assert data[3] == 1  # dc id
                assert data[4] == networks.index('N1')
                assert data[5] == 1  # download id (only 1)
                # assert the correct segments categories
                # (list length is the number of categories,
                # each element is the num of segments for that category,
                # and the sum of all elements must be 4)
                assert sorted(data[6][1::2]) == [1, 3]
            elif staname == 'S2b':
                assert data[0] == 3
                assert data[1] == 22.2  # lat
                assert data[2] == 22.2  # lon
                assert data[3] == 1  # dc id
                assert data[4] == networks.index('N2')
                assert data[5] == 1  # download id (only 1)
                # assert the correct segments categories
                # (list length is the number of categories,
                # each element is the num of segments for that category,
                # and the sum of all elements must be 4)
                assert sorted(data[6][1::2]) == [1, 1, 1, 1]
            else:
                raise Exception('station should not be there: %s' % staname)

        assert result.output.startswith("""Fetching data, please wait (this might take a while depending on the db size and connection)
download statistics written to """)
        assert not mock_open_in_browser.called
        assert not mock_gettempdir.called

        # html output, to temp file
        mytmpdir = pytestdir.makedir()
        assert not os.listdir(mytmpdir)
        mock_gettempdir.side_effect = lambda *a, **v: mytmpdir
        result = runner.invoke(cli, ['utils', self.CMD_NAME, '--html', '--dburl',  db.dburl])
        assert not result.exception
        assert mock_open_in_browser.called
        assert mock_gettempdir.called
        assert os.listdir(mytmpdir) == ['s2s_dstats.html']
Пример #5
0
def process(dburl, pyfile, funcname=None, config=None, outfile=None, log2file=False,
            verbose=False, append=False, **param_overrides):
    """
        Process the segment saved in the db and optionally saves the results into `outfile`
        in .csv format. Calles F the function named `funcname` defined in `pyfile`
        If `outfile` is given , then F should return lists/dicts to be written as
            csv row.
        If `outfile` is not given, then the returned values of F will be ignored
            (F is supposed to process data without returning a value, e.g. save processed
            miniSeed to the FileSystem)

        :param log2file: if True, all messages with level >= logging.INFO will be printed to
            a log file named  <outfile>.<now>.log  (where now is the current date and time ins iso
            format) or <pyfile>.<now>.log, if <outfile> is None

        :param verbose: if True, all messages with level logging.INFO, logging.ERROR and
            logging.CRITICAL will be printed to the screen, as well as a progress-bar showing the
            eta (estimated time available).

        :param param_overrides: paramter that will override the yaml config. Nested dict will be
            merged, not replaced
    """
    # implementation details: this function returns 0 on success and raises otherwise.
    # First, it can raise ValueError for a bad parameter (checked before starting db session and
    # logger),
    # Then, during processing, each segment error which is not (ImportError, NameError,
    # AttributeError, SyntaxError, TypeError) is logged as warning and the program continues.
    # Other exceptions are raised, caught here and logged with level CRITICAL, with the stack trace:
    # this allows to help users to discovers possible bugs in pyfile, without waiting for
    # the whole process to finish

    # checks dic values (modify in place) and returns dic value(s) needed here:
    session, pyfunc, funcname, config_dict = \
        load_config_for_process(dburl, pyfile, funcname, config, outfile, **param_overrides)

    loghandlers = configlog4processing(logger, (outfile or pyfile) if log2file else None, verbose)
    try:
        abp = os.path.abspath
        info = [
            "Input database:      %s" % secure_dburl(dburl),
            "Processing function: %s:%s" % (abp(pyfile), funcname),
            "Config. file:        %s" % (abp(config) if config else 'n/a'),
            "Log file:            %s" % (abp(loghandlers[0].baseFilename) if log2file else 'n/a'),
            "Output file:         %s" % (abp(outfile) if outfile else 'n/a')
        ]
        logger.info(ascii_decorate("\n".join(info)))

        stime = time.time()
        writer_options = config_dict.get('advanced_settings', {}).get('writer_options', {})
        run_process(session, pyfunc, get_writer(outfile, append, writer_options),
                    config_dict, verbose)
        logger.info("Completed in %s", str(totimedelta(stime)))
        return 0  # contrarily to download, an exception should always raise and log as error
        # with the stack trace
        # (this includes pymodule exceptions e.g. TypeError)
    except KeyboardInterrupt:
        logger.critical("Aborted by user")  # see comment above
        raise
    except:  # @IgnorePep8 pylint: disable=broad-except
        logger.critical("Process aborted", exc_info=True)  # see comment above
        raise
    finally:
        closesession(session)
    def test_simple_dreport(self, mock_gettempdir, mock_open_in_browser, db,
                            pytestdir):
        '''test a case where save inventory is True, and that we saved inventories'''

        # test "no flags" case:
        runner = CliRunner()
        # text output, to file
        outfile = pytestdir.newfile('.txt')
        result = runner.invoke(
            cli, ['utils', self.CMD_NAME, '--dburl', db.dburl, outfile])
        assert not result.exception
        expected_string = ascii_decorate(
            "Download id: 1 (%s)" %
            str(db.session.query(Download.run_time).first()[0]))
        # result.output below is uncicode in PY2, whereas expected_string is str
        # Thus
        if PY2:
            expected_string = expected_string.decode('utf8')

        content = open(outfile).read()
        assert expected_string.strip() == content.strip()

        # test "normal" case:
        runner = CliRunner()
        # text output, to file
        outfile = pytestdir.newfile('.txt')
        result = runner.invoke(cli, [
            'utils', self.CMD_NAME, '--config', '--log', '--dburl', db.dburl,
            outfile
        ])
        assert not result.exception
        expected_string = ascii_decorate(
            "Download id: 1 (%s)" %
            str(db.session.query(Download.run_time).first()[0]))
        expected_string += """

Configuration: N/A

Log messages: N/A"""
        # result.output below is uncicode in PY2, whereas expected_string is str
        # Thus
        if PY2:
            expected_string = expected_string.decode('utf8')

        content = open(outfile).read()
        assert expected_string in content
        assert result.output.startswith(
            """Fetching data, please wait (this might take a while depending on the db size and connection)
download report written to """)
        assert not mock_open_in_browser.called
        assert not mock_gettempdir.called

        # calling with no ouptut file (print to screen, i.e. result.output):
        result = runner.invoke(
            cli,
            ['utils', self.CMD_NAME, '--log', '--config', '--dburl', db.dburl])
        assert not result.exception
        content = result.output
        assert expected_string in content
        assert result.output.startswith(
            """Fetching data, please wait (this might take a while depending on the db size and connection)
""")
        assert not mock_open_in_browser.called
        assert not mock_gettempdir.called

        expected = """Fetching data, please wait (this might take a while depending on the db size and connection)
"""
        # try with flags:
        result = runner.invoke(
            cli, ['utils', self.CMD_NAME, '--dburl', '--log', db.dburl])
        assert expected in result.output
        assert not result.output[result.output.index(expected) +
                                 len(expected):]

        # try with flags:
        result = runner.invoke(
            cli, ['utils', self.CMD_NAME, '--dburl', '--config', db.dburl])
        assert expected in result.output
        assert not result.output[result.output.index(expected) +
                                 len(expected):]