def default_repl_keys(): """Return a dict of default keys for replacement in a :class:`ParameterStudy`. Each key will in practice be processed by :class:`FileTemplate`, such that e.g. the key 'foo' becomes the placeholder 'XXXFOO'. Each of these placeholders can be used in any parameter study in any file template, indenpendent from `params_lst` to :class:`ParameterStudy`. Notes ----- If this function is called, dummy files are written to a temp dir, the datsbase is read, and the file are deleted. """ # HACK!!! Due to the way ParameterStudy.write_input() is coded, we really # need to set up a dummy ParameterStudy and read out the database to get # the replacement keys :-) import tempfile calc_root = tempfile.mkdtemp() jobfn_templ = pj(calc_root, 'foo.job') common.file_write(jobfn_templ, 'dummy job template, go away!') m = Machine(hostname='foo', template=FileTemplate(basename='foo.job', templ_dir=calc_root)) print("writing test files to: %s, will be deleted" % calc_root) params_lst = [[SQLEntry(key='dummy', sqlval=1)]] study = ParameterStudy(machines=m, params_lst=params_lst, calc_root=calc_root) study.write_input() db = SQLiteDB(study.dbfn, table='calc') db_keys = [str(x[0]) for x in db.get_header()] for kk in ['dummy', 'hostname']: db_keys.pop(db_keys.index(kk)) db.finish() ret = { 'ParameterStudy': db_keys, 'Machine': list(m.get_sql_record().keys()) } shutil.rmtree(calc_root) return ret
def default_repl_keys(): """Return a dict of default keys for replacement in a :class:`ParameterStudy`. Each key will in practice be processed by :class:`FileTemplate`, such that e.g. the key 'foo' becomes the placeholder 'XXXFOO'. Each of these placeholders can be used in any parameter study in any file template, indenpendent from `params_lst` to :class:`ParameterStudy`. Notes ----- If this function is called, dummy files are written to a temp dir, the datsbase is read, and the file are deleted. """ # HACK!!! Due to the way ParameterStudy.write_input() is coded, we really # need to set up a dummy ParameterStudy and read out the database to get # the replacement keys :-) import tempfile calc_root = tempfile.mkdtemp() jobfn_templ = pj(calc_root, 'foo.job') common.file_write(jobfn_templ, 'dummy job template, go away!') m = Machine(hostname='foo', template=FileTemplate(basename='foo.job', templ_dir=calc_root)) print ("writing test files to: %s, will be deleted" %calc_root) params_lst = [[SQLEntry(key='dummy', sqlval=1)]] study = ParameterStudy(machines=m, params_lst=params_lst, calc_root=calc_root) study.write_input() db = SQLiteDB(study.dbfn, table='calc') db_keys = [str(x[0]) for x in db.get_header()] for kk in ['dummy', 'hostname']: db_keys.pop(db_keys.index(kk)) db.finish() ret = {'ParameterStudy' : db_keys, 'Machine' : m.get_sql_record().keys()} shutil.rmtree(calc_root) return ret
def write_input(self, mode='a', backup=True, sleep=0, excl=True): """ Create calculation dir(s) for each parameter set and write input files based on ``templates``. Write sqlite database storing all relevant parameters. Write (bash) shell script to start all calculations (run locally or submitt batch job file, depending on ``machine.subcmd``). Parameters ---------- mode : str, optional Fine tune how to write input files (based on ``templates``) to calc dirs calc_foo/0/, calc_foo/1/, ... . Note that this doesn't change the base dir calc_foo at all, only the subdirs for each calc. {'a', 'w'} | 'a': Append mode (default). If a previous database is found, then | subsequent calculations are numbered based on the last 'idx'. | calc_foo/0 # old | calc_foo/1 # old | calc_foo/2 # new | calc_foo/3 # new | 'w': Write mode. The target dirs are purged and overwritten. Also, | the database (self.dbfn) is overwritten. Use this to | iteratively tune your inputs, NOT for working on already | present results! | calc_foo/0 # new | calc_foo/1 # new backup : bool, optional Before writing anything, do a backup of self.calc_dir if it already exists. sleep : int, optional For the script to start (submitt) all jobs: time in seconds for the shell sleep(1) commmand. excl : bool If in append mode, a file <calc_root>/excl_push with all indices of calculations from old revisions is written. Can be used with ``rsync --exclude-from=excl_push`` when pushing appended new calculations to a cluster. """ assert mode in ['a', 'w'], "Unknown mode: '%s'" %mode if os.path.exists(self.dbfn): if backup: common.backup(self.dbfn) if mode == 'w': os.remove(self.dbfn) have_new_db = not os.path.exists(self.dbfn) common.makedirs(self.calc_root) # this call creates a file ``self.dbfn`` if it doesn't exist sqldb = SQLiteDB(self.dbfn, table=self.db_table) # max_idx: counter for calc dir numbering revision = 0 if have_new_db: max_idx = -1 else: if mode == 'a': if sqldb.has_column('idx'): max_idx = sqldb.execute("select max(idx) from %s" \ %self.db_table).fetchone()[0] else: raise StandardError("database '%s': table '%s' has no " "column 'idx', don't know how to number calcs" %(self.dbfn, self.db_table)) if sqldb.has_column('revision'): revision = int(sqldb.get_single("select max(revision) \ from %s" %self.db_table)) + 1 elif mode == 'w': max_idx = -1 sql_records = [] hostnames = [] for imach, machine in enumerate(self.machines): hostnames.append(machine.hostname) calc_dir = pj(self.calc_root, self.calc_dir_prefix + \ '_%s' %machine.hostname) if os.path.exists(calc_dir): if backup: common.backup(calc_dir) if mode == 'w': common.system("rm -r %s" %calc_dir, wait=True) run_txt = "here=$(pwd)\n" for _idx, params in enumerate(self.params_lst): params = common.flatten(params) idx = max_idx + _idx + 1 calc_subdir = pj(calc_dir, str(idx)) extra_dct = \ {'revision': revision, 'study_name': self.study_name, 'idx': idx, 'calc_name' : self.study_name + "_run%i" %idx, } extra_params = [SQLEntry(key=key, sqlval=val) for key,val in \ extra_dct.iteritems()] # templates[:] to copy b/c they may be modified in Calculation calc = Calculation(machine=machine, templates=self.templates[:], params=params + extra_params, calc_dir=calc_subdir, ) if mode == 'w' and os.path.exists(calc_subdir): shutil.rmtree(calc_subdir) calc.write_input() run_txt += "cd %i && %s %s && cd $here && sleep %i\n" %(idx,\ machine.subcmd, machine.get_jobfile_basename(), sleep) if imach == 0: sql_records.append(calc.get_sql_record()) common.file_write(pj(calc_dir, 'run.sh'), run_txt) for record in sql_records: record['hostname'] = SQLEntry(sqlval=','.join(hostnames)) # for incomplete parameters: collect header parts from all records and # make a set = unique entries raw_header = [(key, entry.sqltype.upper()) for record in sql_records \ for key, entry in record.iteritems()] header = list(set(raw_header)) if have_new_db: sqldb.create_table(header) else: for record in sql_records: for key, entry in record.iteritems(): if not sqldb.has_column(key): sqldb.add_column(key, entry.sqltype.upper()) for record in sql_records: cmd = "insert into %s (%s) values (%s)"\ %(self.db_table, ",".join(record.keys()), ",".join(['?']*len(record.keys()))) sqldb.execute(cmd, tuple(entry.sqlval for entry in record.itervalues())) if excl and revision > 0 and sqldb.has_column('revision'): old_idx_lst = [str(x) for x, in sqldb.execute("select idx from calc where \ revision < ?", (revision,))] common.file_write(pj(self.calc_root, 'excl_push'), '\n'.join(old_idx_lst)) sqldb.finish()
def write_input(self, mode='a', backup=True, sleep=0, excl=True): """ Create calculation dir(s) for each parameter set and write input files based on ``templates``. Write sqlite database storing all relevant parameters. Write (bash) shell script to start all calculations (run locally or submitt batch job file, depending on ``machine.subcmd``). Parameters ---------- mode : str, optional Fine tune how to write input files (based on ``templates``) to calc dirs calc_foo/0/, calc_foo/1/, ... . Note that this doesn't change the base dir calc_foo at all, only the subdirs for each calc. {'a', 'w'} | 'a': Append mode (default). If a previous database is found, then | subsequent calculations are numbered based on the last 'idx'. | calc_foo/0 # old | calc_foo/1 # old | calc_foo/2 # new | calc_foo/3 # new | 'w': Write mode. The target dirs are purged and overwritten. Also, | the database (self.dbfn) is overwritten. Use this to | iteratively tune your inputs, NOT for working on already | present results! | calc_foo/0 # new | calc_foo/1 # new backup : bool, optional Before writing anything, do a backup of self.calc_dir if it already exists. sleep : int, optional For the script to start (submitt) all jobs: time in seconds for the shell sleep(1) commmand. excl : bool If in append mode, a file <calc_root>/excl_push with all indices of calculations from old revisions is written. Can be used with ``rsync --exclude-from=excl_push`` when pushing appended new calculations to a cluster. """ assert mode in ['a', 'w'], "Unknown mode: '%s'" % mode if os.path.exists(self.dbfn): if backup: common.backup(self.dbfn) if mode == 'w': os.remove(self.dbfn) have_new_db = not os.path.exists(self.dbfn) common.makedirs(self.calc_root) # this call creates a file ``self.dbfn`` if it doesn't exist sqldb = SQLiteDB(self.dbfn, table=self.db_table) # max_idx: counter for calc dir numbering revision = 0 if have_new_db: max_idx = -1 else: if mode == 'a': if sqldb.has_column('idx'): max_idx = sqldb.execute("select max(idx) from %s" \ %self.db_table).fetchone()[0] else: raise Exception( "database '%s': table '%s' has no " "column 'idx', don't know how to number calcs" % (self.dbfn, self.db_table)) if sqldb.has_column('revision'): revision = int( sqldb.get_single("select max(revision) \ from %s" % self.db_table)) + 1 elif mode == 'w': max_idx = -1 sql_records = [] hostnames = [] for imach, machine in enumerate(self.machines): hostnames.append(machine.hostname) calc_dir = pj(self.calc_root, self.calc_dir_prefix + \ '_%s' %machine.hostname) if os.path.exists(calc_dir): if backup: common.backup(calc_dir) if mode == 'w': common.system("rm -r %s" % calc_dir, wait=True) run_txt = "here=$(pwd)\n" for _idx, params in enumerate(self.params_lst): params = common.flatten(params) idx = max_idx + _idx + 1 calc_subdir = pj(calc_dir, str(idx)) extra_dct = \ {'revision': revision, 'study_name': self.study_name, 'idx': idx, 'calc_name' : self.study_name + "_run%i" %idx, } extra_params = [SQLEntry(key=key, sqlval=val) for key,val in \ extra_dct.items()] # templates[:] to copy b/c they may be modified in Calculation calc = Calculation( machine=machine, templates=self.templates[:], params=params + extra_params, calc_dir=calc_subdir, ) if mode == 'w' and os.path.exists(calc_subdir): shutil.rmtree(calc_subdir) calc.write_input() run_txt += "cd %i && %s %s && cd $here && sleep %i\n" %(idx,\ machine.subcmd, machine.get_jobfile_basename(), sleep) if imach == 0: sql_records.append(calc.get_sql_record()) common.file_write(pj(calc_dir, 'run.sh'), run_txt) for record in sql_records: record['hostname'] = SQLEntry(sqlval=','.join(hostnames)) # for incomplete parameters: collect header parts from all records and # make a set = unique entries raw_header = [(key, entry.sqltype.upper()) for record in sql_records \ for key, entry in record.items()] header = list(set(raw_header)) if have_new_db: sqldb.create_table(header) else: for record in sql_records: for key, entry in record.items(): if not sqldb.has_column(key): sqldb.add_column(key, entry.sqltype.upper()) for record in sql_records: cmd = "insert into %s (%s) values (%s)"\ %(self.db_table, ",".join(list(record.keys())), ",".join(['?']*len(list(record.keys())))) sqldb.execute(cmd, tuple(entry.sqlval for entry in record.values())) if excl and revision > 0 and sqldb.has_column('revision'): old_idx_lst = [ str(x) for x, in sqldb.execute( "select idx from calc where \ revision < ?", ( revision, )) ] common.file_write(pj(self.calc_root, 'excl_push'), '\n'.join(old_idx_lst)) sqldb.finish()