def test_write_database(tmp_path, mini_testsuite, empty_alt_testsuite): tmp_ts = tmp_path.joinpath('test_write_database') db = tsdb.Database(mini_testsuite) tsdb.write_database(db, str(tmp_ts)) assert tmp_ts.is_dir() assert tmp_ts.joinpath('relations').is_file() assert tmp_ts.joinpath('item').is_file() assert tmp_ts.joinpath('parse').is_file() assert tmp_ts.joinpath('result').is_file() assert tmp_ts.joinpath('parse').read_text() == ( '10@10@1\n' '20@20@0\n' '30@30@1\n') tsdb.write_database(db, str(tmp_ts), names=['item']) assert tmp_ts.joinpath('item').is_file() assert not tmp_ts.joinpath('parse').is_file() assert not tmp_ts.joinpath('result').is_file() # alt_schema drops i-wf field from mini_testsuite's schema alt_schema = tsdb.read_schema(empty_alt_testsuite) tsdb.write_database(db, str(tmp_ts), names=['item'], schema=alt_schema) alt_db = tsdb.Database(str(tmp_ts)) assert len(db.schema['item']) == 4 assert len(alt_db.schema['item']) == 3 assert tmp_ts.joinpath('item').read_text() == ( '10@It rained.@1-feb-2018 15:00\n' '20@Rained.@01-02-18 15:00:00\n' '30@It snowed.@2018-2-1 (15:00:00)\n')
def test_init(self, tmp_path, mini_testsuite): with pytest.raises(TypeError): tsdb.Database() with pytest.raises(tsdb.TSDBError): dir = tmp_path.joinpath('not_a_testsuite') dir.mkdir() tsdb.Database(str(dir)) tsdb.Database(mini_testsuite)
def test__getitem__(self, mini_testsuite, empty_testsuite): db = tsdb.Database(mini_testsuite) assert list(db['item']) == [ ('10', 'It rained.', '1', '1-feb-2018 15:00'), ('20', 'Rained.', '0', '01-02-18 15:00:00'), ('30', 'It snowed.', '1', '2018-2-1 (15:00:00)'), ] # with autocast db.autocast = True assert list(db['item']) == [ (10, 'It rained.', 1, datetime(2018, 2, 1, 15, 0)), (20, 'Rained.', 0, datetime(2018, 2, 1, 15, 0)), (30, 'It snowed.', 1, datetime(2018, 2, 1, 15, 0)), ] # relation undefined with pytest.raises(tsdb.TSDBError): db['not_a_relation'] # relation defined by file missing db = tsdb.Database(empty_testsuite) with pytest.raises(tsdb.TSDBError): db['item']
def test_bad_date_issue_279(tmp_path, empty_alt_testsuite): tmp_ts = tmp_path.joinpath('test_bad_date_issue_279') tmp_ts.mkdir() schema = tsdb.read_schema(empty_alt_testsuite) fields = schema['item'] tsdb.write_schema(tmp_ts, schema) tsdb.write(tmp_ts, 'item', [(0, 'The cat meows.', datetime(1999, 9, 8))], fields) db = tsdb.Database(tmp_ts) assert list(db['item']) == [('0', 'The cat meows.', '8-sep-1999')] tsdb.write(tmp_ts, 'item', [(0, 'The cat meows.', 'September 8, 1999')], fields) assert list(db['item']) == [('0', 'The cat meows.', 'September 8, 1999')]
def select(query: str, path: util.PathLike, record_class=None): """ Select data from [incr tsdb()] test suites. Args: query (str): TSQL select query (e.g., `'i-id i-input mrs'` or `'* from item where readings > 0'`) path: path to a TSDB test suite record_class: alternative class for records in the selection Yields: selected data from the test suite """ db = tsdb.Database(path, autocast=True) return tsql.select(query, db, record_class=record_class)
def update_test(args): """ Use the current test profile to the gold. """ tests = list(_discover(args)) if len(tests) != 1: raise RegressionTestError('only 1 test may be updated at a time') name, idx, chc, txt, skel, prof, gold = tests[0] try: db = tsdb.Database(prof) tsdb.write_database(db, gold) except tsdb.TSDBError as exc: raise RegressionTestError('Failed to update gold.') from exc
def _read(path, source_codec, select, kwargs): if hasattr(path, 'read'): xs = list(source_codec.load(path, **kwargs)) else: path = Path(path).expanduser() if path.is_dir(): db = tsdb.Database(path) # ts = itsdb.TestSuite(path) xs = [ next(iter(source_codec.loads(r[0], **kwargs)), None) for r in tsql.select(select, db) ] else: xs = list(source_codec.load(path, **kwargs)) yield from xs
def test_select_from(self, mini_testsuite): db = tsdb.Database(mini_testsuite) fields = ('i-id', 'i-date') assert list(db.select_from('item', fields)) == [ ('10', '1-feb-2018 15:00'), ('20', '01-02-18 15:00:00'), ('30', '2018-2-1 (15:00:00)'), ] assert list(db.select_from('item', fields, cast=True)) == [ (10, datetime(2018, 2, 1, 15, 0)), (20, datetime(2018, 2, 1, 15, 0)), (30, datetime(2018, 2, 1, 15, 0)), ] db.autocast = True assert list(db.select_from('item', fields)) == [ (10, datetime(2018, 2, 1, 15, 0)), (20, datetime(2018, 2, 1, 15, 0)), (30, datetime(2018, 2, 1, 15, 0)), ] assert list(db.select_from('item', fields, cast=True)) == [ (10, datetime(2018, 2, 1, 15, 0)), (20, datetime(2018, 2, 1, 15, 0)), (30, datetime(2018, 2, 1, 15, 0)), ]
def test_path(self, mini_testsuite): db = tsdb.Database(mini_testsuite) assert db.path == pathlib.Path(mini_testsuite)
def mkprof(destination, source=None, schema=None, where=None, delimiter=None, refresh=False, skeleton=False, full=False, gzip=False, quiet=False): """ Create [incr tsdb()] profiles or skeletons. Data for the testsuite may come from an existing testsuite or from a list of sentences. There are four main usage patterns: - `source="testsuite/"` -- read data from `testsuite/` - `source=None, refresh=True` -- read data from *destination* - `source=None, refresh=False` -- read sentences from stdin - `source="sents.txt"` -- read sentences from `sents.txt` The latter two require the *schema* parameter. Args: destination (str): path of the new testsuite source (str): path to a source testsuite or a file containing sentences; if not given and *refresh* is `False`, sentences are read from stdin schema (str): path to a relations file to use for the created testsuite; if `None` and *source* is a test suite, the schema of *source* is used where (str): TSQL condition to filter records by; ignored if *source* is not a testsuite delimiter (str): if given, split lines from *source* or stdin on the character *delimiter*; if *delimiter* is `"@"`, split using :func:`delphin.tsdb.split`; a header line with field names is required; ignored when the data source is not text lines refresh (bool): if `True`, rewrite the data at *destination*; implies *full* is `True`; ignored if *source* is not `None`, best combined with *schema* or *gzip* (default: `False`) skeleton (bool): if `True`, only write tsdb-core files (default: `False`) full (bool): if `True`, copy all data from the source testsuite; ignored if the data source is not a testsuite or if *skeleton* is `True` (default: `False`) gzip (bool): if `True`, non-empty tables will be compressed with gzip quiet (bool): if `True`, don't print summary information """ destination = Path(destination).expanduser() if source is not None: source = Path(source).expanduser() if schema is not None: schema = tsdb.read_schema(schema) old_relation_files = [] # work in-place on destination test suite if source is None and refresh: db = tsdb.Database(destination) old_relation_files = list(db.schema) tsdb.write_database(db, db.path, schema=schema, gzip=gzip) # input is sentences on stdin or a file of sentences elif source is None and not refresh: _mkprof_from_lines( destination, sys.stdin, schema, delimiter, gzip) elif source.is_file(): with source.open() as fh: _mkprof_from_lines( destination, fh, schema, delimiter, gzip) # input is source testsuite elif source.is_dir(): db = tsdb.Database(source) old_relation_files = list(db.schema) _mkprof_from_database( destination, db, schema, where, full, gzip) else: raise CommandError(f'invalid source for mkprof: {source!s}') _mkprof_cleanup(destination, skeleton, old_relation_files) if not quiet: _mkprof_summarize(destination, tsdb.read_schema(destination))