def write_input(self, mode='a', backup=True, sleep=0, excl=True): """ Create calculation dir(s) for each parameter set and write input files based on ``templates``. Write sqlite database storing all relevant parameters. Write (bash) shell script to start all calculations (run locally or submitt batch job file, depending on ``machine.subcmd``). Parameters ---------- mode : str, optional Fine tune how to write input files (based on ``templates``) to calc dirs calc_foo/0/, calc_foo/1/, ... . Note that this doesn't change the base dir calc_foo at all, only the subdirs for each calc. {'a', 'w'} | 'a': Append mode (default). If a previous database is found, then | subsequent calculations are numbered based on the last 'idx'. | calc_foo/0 # old | calc_foo/1 # old | calc_foo/2 # new | calc_foo/3 # new | 'w': Write mode. The target dirs are purged and overwritten. Also, | the database (self.dbfn) is overwritten. Use this to | iteratively tune your inputs, NOT for working on already | present results! | calc_foo/0 # new | calc_foo/1 # new backup : bool, optional Before writing anything, do a backup of self.calc_dir if it already exists. sleep : int, optional For the script to start (submitt) all jobs: time in seconds for the shell sleep(1) commmand. excl : bool If in append mode, a file <calc_root>/excl_push with all indices of calculations from old revisions is written. Can be used with ``rsync --exclude-from=excl_push`` when pushing appended new calculations to a cluster. """ assert mode in ['a', 'w'], "Unknown mode: '%s'" %mode if os.path.exists(self.dbfn): if backup: common.backup(self.dbfn) if mode == 'w': os.remove(self.dbfn) have_new_db = not os.path.exists(self.dbfn) common.makedirs(self.calc_root) # this call creates a file ``self.dbfn`` if it doesn't exist sqldb = SQLiteDB(self.dbfn, table=self.db_table) # max_idx: counter for calc dir numbering revision = 0 if have_new_db: max_idx = -1 else: if mode == 'a': if sqldb.has_column('idx'): max_idx = sqldb.execute("select max(idx) from %s" \ %self.db_table).fetchone()[0] else: raise StandardError("database '%s': table '%s' has no " "column 'idx', don't know how to number calcs" %(self.dbfn, self.db_table)) if sqldb.has_column('revision'): revision = int(sqldb.get_single("select max(revision) \ from %s" %self.db_table)) + 1 elif mode == 'w': max_idx = -1 sql_records = [] hostnames = [] for imach, machine in enumerate(self.machines): hostnames.append(machine.hostname) calc_dir = pj(self.calc_root, self.calc_dir_prefix + \ '_%s' %machine.hostname) if os.path.exists(calc_dir): if backup: common.backup(calc_dir) if mode == 'w': common.system("rm -r %s" %calc_dir, wait=True) run_txt = "here=$(pwd)\n" for _idx, params in enumerate(self.params_lst): params = common.flatten(params) idx = max_idx + _idx + 1 calc_subdir = pj(calc_dir, str(idx)) extra_dct = \ {'revision': revision, 'study_name': self.study_name, 'idx': idx, 'calc_name' : self.study_name + "_run%i" %idx, } extra_params = [SQLEntry(key=key, sqlval=val) for key,val in \ extra_dct.iteritems()] # templates[:] to copy b/c they may be modified in Calculation calc = Calculation(machine=machine, templates=self.templates[:], params=params + extra_params, calc_dir=calc_subdir, ) if mode == 'w' and os.path.exists(calc_subdir): shutil.rmtree(calc_subdir) calc.write_input() run_txt += "cd %i && %s %s && cd $here && sleep %i\n" %(idx,\ machine.subcmd, machine.get_jobfile_basename(), sleep) if imach == 0: sql_records.append(calc.get_sql_record()) common.file_write(pj(calc_dir, 'run.sh'), run_txt) for record in sql_records: record['hostname'] = SQLEntry(sqlval=','.join(hostnames)) # for incomplete parameters: collect header parts from all records and # make a set = unique entries raw_header = [(key, entry.sqltype.upper()) for record in sql_records \ for key, entry in record.iteritems()] header = list(set(raw_header)) if have_new_db: sqldb.create_table(header) else: for record in sql_records: for key, entry in record.iteritems(): if not sqldb.has_column(key): sqldb.add_column(key, entry.sqltype.upper()) for record in sql_records: cmd = "insert into %s (%s) values (%s)"\ %(self.db_table, ",".join(record.keys()), ",".join(['?']*len(record.keys()))) sqldb.execute(cmd, tuple(entry.sqlval for entry in record.itervalues())) if excl and revision > 0 and sqldb.has_column('revision'): old_idx_lst = [str(x) for x, in sqldb.execute("select idx from calc where \ revision < ?", (revision,))] common.file_write(pj(self.calc_root, 'excl_push'), '\n'.join(old_idx_lst)) sqldb.finish()
def write_input(self, mode='a', backup=True, sleep=0, excl=True): """ Create calculation dir(s) for each parameter set and write input files based on ``templates``. Write sqlite database storing all relevant parameters. Write (bash) shell script to start all calculations (run locally or submitt batch job file, depending on ``machine.subcmd``). Parameters ---------- mode : str, optional Fine tune how to write input files (based on ``templates``) to calc dirs calc_foo/0/, calc_foo/1/, ... . Note that this doesn't change the base dir calc_foo at all, only the subdirs for each calc. {'a', 'w'} | 'a': Append mode (default). If a previous database is found, then | subsequent calculations are numbered based on the last 'idx'. | calc_foo/0 # old | calc_foo/1 # old | calc_foo/2 # new | calc_foo/3 # new | 'w': Write mode. The target dirs are purged and overwritten. Also, | the database (self.dbfn) is overwritten. Use this to | iteratively tune your inputs, NOT for working on already | present results! | calc_foo/0 # new | calc_foo/1 # new backup : bool, optional Before writing anything, do a backup of self.calc_dir if it already exists. sleep : int, optional For the script to start (submitt) all jobs: time in seconds for the shell sleep(1) commmand. excl : bool If in append mode, a file <calc_root>/excl_push with all indices of calculations from old revisions is written. Can be used with ``rsync --exclude-from=excl_push`` when pushing appended new calculations to a cluster. """ assert mode in ['a', 'w'], "Unknown mode: '%s'" % mode if os.path.exists(self.dbfn): if backup: common.backup(self.dbfn) if mode == 'w': os.remove(self.dbfn) have_new_db = not os.path.exists(self.dbfn) common.makedirs(self.calc_root) # this call creates a file ``self.dbfn`` if it doesn't exist sqldb = SQLiteDB(self.dbfn, table=self.db_table) # max_idx: counter for calc dir numbering revision = 0 if have_new_db: max_idx = -1 else: if mode == 'a': if sqldb.has_column('idx'): max_idx = sqldb.execute("select max(idx) from %s" \ %self.db_table).fetchone()[0] else: raise Exception( "database '%s': table '%s' has no " "column 'idx', don't know how to number calcs" % (self.dbfn, self.db_table)) if sqldb.has_column('revision'): revision = int( sqldb.get_single("select max(revision) \ from %s" % self.db_table)) + 1 elif mode == 'w': max_idx = -1 sql_records = [] hostnames = [] for imach, machine in enumerate(self.machines): hostnames.append(machine.hostname) calc_dir = pj(self.calc_root, self.calc_dir_prefix + \ '_%s' %machine.hostname) if os.path.exists(calc_dir): if backup: common.backup(calc_dir) if mode == 'w': common.system("rm -r %s" % calc_dir, wait=True) run_txt = "here=$(pwd)\n" for _idx, params in enumerate(self.params_lst): params = common.flatten(params) idx = max_idx + _idx + 1 calc_subdir = pj(calc_dir, str(idx)) extra_dct = \ {'revision': revision, 'study_name': self.study_name, 'idx': idx, 'calc_name' : self.study_name + "_run%i" %idx, } extra_params = [SQLEntry(key=key, sqlval=val) for key,val in \ extra_dct.items()] # templates[:] to copy b/c they may be modified in Calculation calc = Calculation( machine=machine, templates=self.templates[:], params=params + extra_params, calc_dir=calc_subdir, ) if mode == 'w' and os.path.exists(calc_subdir): shutil.rmtree(calc_subdir) calc.write_input() run_txt += "cd %i && %s %s && cd $here && sleep %i\n" %(idx,\ machine.subcmd, machine.get_jobfile_basename(), sleep) if imach == 0: sql_records.append(calc.get_sql_record()) common.file_write(pj(calc_dir, 'run.sh'), run_txt) for record in sql_records: record['hostname'] = SQLEntry(sqlval=','.join(hostnames)) # for incomplete parameters: collect header parts from all records and # make a set = unique entries raw_header = [(key, entry.sqltype.upper()) for record in sql_records \ for key, entry in record.items()] header = list(set(raw_header)) if have_new_db: sqldb.create_table(header) else: for record in sql_records: for key, entry in record.items(): if not sqldb.has_column(key): sqldb.add_column(key, entry.sqltype.upper()) for record in sql_records: cmd = "insert into %s (%s) values (%s)"\ %(self.db_table, ",".join(list(record.keys())), ",".join(['?']*len(list(record.keys())))) sqldb.execute(cmd, tuple(entry.sqlval for entry in record.values())) if excl and revision > 0 and sqldb.has_column('revision'): old_idx_lst = [ str(x) for x, in sqldb.execute( "select idx from calc where \ revision < ?", ( revision, )) ] common.file_write(pj(self.calc_root, 'excl_push'), '\n'.join(old_idx_lst)) sqldb.finish()
def test_sql(): # Check for sqlite3 command line tool. In Python 3.3, we can use # shutil.which(). have_sqlite3 = False for pp in sys.path: exe = pj(pp, 'sqlite3') if os.path.isfile(exe): print("found:", exe) have_sqlite3 = True break # --- SQLiteDB ---------------------------------------------------- dbfn = pj(testdir, 'test.db') if os.path.exists(dbfn): os.remove(dbfn) header = [('idx', 'INTEGER'), ('foo', 'REAL'), ('bar', 'TEXT')] db = SQLiteDB(dbfn, table='calc') db.execute("CREATE TABLE calc (%s)" %','.join("%s %s" %(x[0], x[1]) \ for x in header)) vals = [[0, 1.1, 'a'], [1, 2.2, 'b'], [2, 3.3, 'c']] for lst in vals: db.execute("INSERT INTO calc (idx, foo, bar) VALUES (?,?,?)", tuple(lst)) db.commit() # get_max_rowid assert db.get_max_rowid() == 3 # has_table assert db.has_table('calc') assert not db.has_table('foo') # has_column assert db.has_column('idx') assert not db.has_column('grrr') # get_single assert float(db.get_single("select foo from calc where idx==0")) == 1.1 assert header == db.get_header() if have_sqlite3: # call sqlite3, the cmd line interface assert common.backtick("sqlite3 %s 'select * from calc'" %dbfn) \ == '0|1.1|a\n1|2.2|b\n2|3.3|c\n' # ret = # [(0, 1.1000000000000001, u'a'), # (1, 2.2000000000000002, u'b'), # (2, 3.2999999999999998, u'c')] ret = db.execute("select * from calc").fetchall() for idx, lst in enumerate(vals): assert list(ret[idx]) == lst # generator object, yields # tup = (0, 1.1000000000000001, u'a') # tup = (1, 2.2000000000000002, u'b') # tup = (2, 3.2999999999999998, u'c') itr = db.execute("select * from calc") for idx, tup in enumerate(itr): assert list(tup) == vals[idx] # [(0, 1.1000000000000001, u'a')] assert db.execute("select * from calc where idx==0").fetchall() == \ [tuple(vals[0])] # (0, 1.1000000000000001, u'a') assert db.execute("select * from calc where idx==0").fetchone() == \ tuple(vals[0]) assert db.execute("select bar from calc where idx==0").fetchone()[0] == \ 'a' # get_list1d(), get_array1d(), get_array() assert db.get_list1d("select idx from calc") == [0, 1, 2] np.testing.assert_array_equal(db.get_array1d("select idx from calc"), np.array([0, 1, 2])) np.testing.assert_array_equal(db.get_array("select idx from calc"), np.array([0, 1, 2])[:, None]) np.testing.assert_array_equal( db.get_array("select idx,foo from calc"), np.array(vals, dtype='S3')[:, :2].astype(float)) # add_column(), fill with values db.add_column('baz', 'TEXT') add_header = [('baz', 'TEXT')] header += add_header assert db.get_header() == header db.execute("UPDATE %s SET baz='xx' where idx==0" % db.table) db.execute("UPDATE %s SET baz='yy' where idx==1" % db.table) db.execute("UPDATE %s SET baz=? where idx==2" % db.table, ('zz', )) db.commit() if have_sqlite3: print(common.backtick("sqlite3 %s 'select * from calc'" % dbfn)) print(db.execute("select baz from calc").fetchall()) assert db.execute("select baz from calc").fetchall() == \ [('xx',), ('yy',), ('zz',)] # add even more cols with add_columns() add_header = [('bob', 'TEXT'), ('alice', 'BLOB')] header += add_header db.add_columns(add_header) assert db.get_header() == header # create_table() dbfn2 = pj(testdir, 'test2.db') header2 = [('a', 'REAL'), ('b', 'TEXT')] db2 = SQLiteDB(dbfn2, table='foo') db2.create_table(header2) assert db2.get_header() == header2 # get_dict() dct = db.get_dict("select foo,bar from calc") cols = [x[0] for x in db.get_header()] for key in ['foo', 'bar']: assert key in cols foo = db.get_list1d("select foo from calc") bar = db.get_list1d("select bar from calc") assert foo == dct['foo'] assert bar == dct['bar']
def test_sql(): # Check for sqlite3 command line tool. In Python 3.3, we can use # shutil.which(). have_sqlite3 = False for pp in sys.path: exe = pj(pp, 'sqlite3') if os.path.isfile(exe): print "found:", exe have_sqlite3 = True break # --- SQLiteDB ---------------------------------------------------- dbfn = pj(testdir, 'test.db') if os.path.exists(dbfn): os.remove(dbfn) header = [('idx', 'INTEGER'), ('foo', 'REAL'), ('bar', 'TEXT')] db = SQLiteDB(dbfn, table='calc') db.execute("CREATE TABLE calc (%s)" %','.join("%s %s" %(x[0], x[1]) \ for x in header)) vals = [[0, 1.1, 'a'], [1, 2.2, 'b'], [2, 3.3, 'c']] for lst in vals: db.execute("INSERT INTO calc (idx, foo, bar) VALUES (?,?,?)", tuple(lst)) db.commit() # get_max_rowid assert db.get_max_rowid() == 3 # has_table assert db.has_table('calc') assert not db.has_table('foo') # has_column assert db.has_column('idx') assert not db.has_column('grrr') # get_single assert float(db.get_single("select foo from calc where idx==0")) == 1.1 assert header == db.get_header() if have_sqlite3: # call sqlite3, the cmd line interface assert common.backtick("sqlite3 %s 'select * from calc'" %dbfn) \ == '0|1.1|a\n1|2.2|b\n2|3.3|c\n' # ret = # [(0, 1.1000000000000001, u'a'), # (1, 2.2000000000000002, u'b'), # (2, 3.2999999999999998, u'c')] ret = db.execute("select * from calc").fetchall() for idx, lst in enumerate(vals): assert list(ret[idx]) == lst # generator object, yields # tup = (0, 1.1000000000000001, u'a') # tup = (1, 2.2000000000000002, u'b') # tup = (2, 3.2999999999999998, u'c') itr = db.execute("select * from calc") for idx, tup in enumerate(itr): assert list(tup) == vals[idx] # [(0, 1.1000000000000001, u'a')] assert db.execute("select * from calc where idx==0").fetchall() == \ [tuple(vals[0])] # (0, 1.1000000000000001, u'a') assert db.execute("select * from calc where idx==0").fetchone() == \ tuple(vals[0]) assert db.execute("select bar from calc where idx==0").fetchone()[0] == \ 'a' # get_list1d(), get_array1d(), get_array() assert db.get_list1d("select idx from calc") == [0,1,2] np.testing.assert_array_equal(db.get_array1d("select idx from calc"), np.array([0,1,2])) np.testing.assert_array_equal(db.get_array("select idx from calc"), np.array([0,1,2])[:,None]) np.testing.assert_array_equal(db.get_array("select idx,foo from calc"), np.array(vals, dtype='S3')[:,:2].astype(float)) # add_column(), fill with values db.add_column('baz', 'TEXT') add_header = [('baz', 'TEXT')] header += add_header assert db.get_header() == header db.execute("UPDATE %s SET baz='xx' where idx==0" %db.table) db.execute("UPDATE %s SET baz='yy' where idx==1" %db.table) db.execute("UPDATE %s SET baz=? where idx==2" %db.table, ('zz',)) db.commit() if have_sqlite3: print common.backtick("sqlite3 %s 'select * from calc'" %dbfn) print db.execute("select baz from calc").fetchall() assert db.execute("select baz from calc").fetchall() == \ [(u'xx',), (u'yy',), (u'zz',)] # add even more cols with add_columns() add_header = [('bob', 'TEXT'), ('alice', 'BLOB')] header += add_header db.add_columns(add_header) assert db.get_header() == header # create_table() dbfn2 = pj(testdir, 'test2.db') header2 = [('a', 'REAL'), ('b', 'TEXT')] db2 = SQLiteDB(dbfn2, table='foo') db2.create_table(header2) assert db2.get_header() == header2 # get_dict() dct = db.get_dict("select foo,bar from calc") cols = [x[0] for x in db.get_header()] for key in ['foo', 'bar']: assert key in cols foo = db.get_list1d("select foo from calc") bar = db.get_list1d("select bar from calc") assert foo == dct['foo'] assert bar == dct['bar']