def dot_legacymodels(self, line): '''load legacy models <generator> <table> </path/to/models.pkl.gz> Create a Crosscat generator named <generator> for the table <table> from the legacy models stored in </path/to/models.pkl.gz>. ''' # XXX Lousy, lousy tokenizer. tokens = line.split() if len(tokens) != 3: self.stdout.write('Usage:' ' .legacymodels <generator> <table>' ' </path/to/models.pkl.gz>\n') return generator = tokens[0] table = tokens[1] pathname = tokens[2] try: bayeslite.bayesdb_load_legacy_models(self._bdb, generator, table, self._metamodel, pathname, create=True) except IOError as e: self.stdout.write('%s\n' % (e,)) except Exception: self.stdout.write(traceback.format_exc())
def test_legacy_models_slow(): bdb = bayeslite.bayesdb_open(builtin_metamodels=False) cc = crosscat.LocalEngine.LocalEngine(seed=0) metamodel = CrosscatMetamodel(cc) bayeslite.bayesdb_register_metamodel(bdb, metamodel) with pytest.raises(ValueError): bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat', dha_models, create=True) with open(dha_csv, 'rU') as f: read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True) bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat', dha_models, create=True) # Make sure guessing also works. bdb.execute('create generator dha_cc0 for dha using crosscat(guess(*))') bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook) # Need to be able to overwrite existing codebook. # # XXX Not sure this is the right API. What if overwrite is a # mistake? bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook) bql = ''' ESTIMATE name FROM dha_cc ORDER BY SIMILARITY TO (name = ?) DESC LIMIT 10 ''' with bdb.savepoint(): assert bdb.execute(bql, ('Albany NY',)).fetchall() == [ ('Albany NY',), ('Scranton PA',), ('United States US',), ('Norfolk VA',), ('Reading PA',), ('Salisbury MD',), ('Louisville KY',), ('Cleveland OH',), ('Covington KY',), ('Akron OH',), ] # Tickles an issue in case-folding of column names. bql = ''' ESTIMATE name FROM dha_cc ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC LIMIT 10 ''' with bdb.savepoint(): assert bdb.execute(bql).fetchall() == [ ('McAllen TX',), ('Worcester MA',), ('Beaumont TX',), ('Temple TX',), ('Corpus Christi TX',), ('Takoma Park MD',), ('Kingsport TN',), ('Bangor ME',), ('Lebanon NH',), ('Panama City FL',), ]
def test_legacy_models__ci_slow(): bdb = bayeslite.bayesdb_open(builtin_metamodels=False) cc = crosscat.LocalEngine.LocalEngine(seed=0) metamodel = CrosscatMetamodel(cc) bayeslite.bayesdb_register_metamodel(bdb, metamodel) with pytest.raises(ValueError): bayeslite.bayesdb_load_legacy_models(bdb, "dha_cc", "dha", "crosscat", dha_models, create=True) with open(dha_csv, "rU") as f: read_csv.bayesdb_read_csv(bdb, "dha", f, header=True, create=True) bayeslite.bayesdb_load_legacy_models(bdb, "dha_cc", "dha", "crosscat", dha_models, create=True) # Make sure guessing also works. bdb.execute("create generator dha_cc0 for dha using crosscat(guess(*))") bayeslite.bayesdb_load_codebook_csv_file(bdb, "dha", dha_codebook) # Need to be able to overwrite existing codebook. # # XXX Not sure this is the right API. What if overwrite is a # mistake? bayeslite.bayesdb_load_codebook_csv_file(bdb, "dha", dha_codebook) bql = """ ESTIMATE name FROM dha_cc ORDER BY SIMILARITY TO (name = ?) DESC LIMIT 10 """ with bdb.savepoint(): assert bdb.execute(bql, ("Albany NY",)).fetchall() == [ ("Albany NY",), ("Scranton PA",), ("United States US",), ("Norfolk VA",), ("Reading PA",), ("Salisbury MD",), ("Louisville KY",), ("Cleveland OH",), ("Covington KY",), ("Akron OH",), ] # Tickles an issue in case-folding of column names. bql = """ ESTIMATE name FROM dha_cc ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC LIMIT 10 """ with bdb.savepoint(): assert bdb.execute(bql).fetchall() == [ ("McAllen TX",), ("Worcester MA",), ("Beaumont TX",), ("Temple TX",), ("Corpus Christi TX",), ("Takoma Park MD",), ("Kingsport TN",), ("Bangor ME",), ("Lebanon NH",), ("Panama City FL",), ]
def test_legacy_models__ci_slow(): bdb = bayeslite.bayesdb_open(builtin_metamodels=False) cc = crosscat.LocalEngine.LocalEngine(seed=0) metamodel = CrosscatMetamodel(cc) bayeslite.bayesdb_register_metamodel(bdb, metamodel) with pytest.raises(ValueError): bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat', dha_models, create=True) with open(dha_csv, 'rU') as f: read_csv.bayesdb_read_csv(bdb, 'dha', f, header=True, create=True) bayeslite.bayesdb_load_legacy_models(bdb, 'dha_cc', 'dha', 'crosscat', dha_models, create=True) # Make sure guessing also works. bdb.execute('create generator dha_cc0 for dha using crosscat(guess(*))') bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook) # Need to be able to overwrite existing codebook. # # XXX Not sure this is the right API. What if overwrite is a # mistake? bayeslite.bayesdb_load_codebook_csv_file(bdb, 'dha', dha_codebook) bql = ''' ESTIMATE name FROM dha_cc ORDER BY SIMILARITY TO (name = ?) DESC LIMIT 10 ''' with bdb.savepoint(): assert bdb.execute(bql, ('Albany NY', )).fetchall() == [ ('Albany NY', ), ('Scranton PA', ), ('United States US', ), ('Norfolk VA', ), ('Reading PA', ), ('Salisbury MD', ), ('Louisville KY', ), ('Cleveland OH', ), ('Covington KY', ), ('Akron OH', ), ] # Tickles an issue in case-folding of column names. bql = ''' ESTIMATE name FROM dha_cc ORDER BY PREDICTIVE PROBABILITY OF mdcr_spnd_amblnc ASC LIMIT 10 ''' with bdb.savepoint(): assert bdb.execute(bql).fetchall() == [ ('McAllen TX', ), ('Worcester MA', ), ('Beaumont TX', ), ('Temple TX', ), ('Corpus Christi TX', ), ('Takoma Park MD', ), ('Kingsport TN', ), ('Bangor ME', ), ('Lebanon NH', ), ('Panama City FL', ), ]