Exemplo n.º 1
0
 def write_input(self, mode='a', backup=True, sleep=0, excl=True):
     """
     Create calculation dir(s) for each parameter set and write input files
     based on ``templates``. Write sqlite database storing all relevant
     parameters. Write (bash) shell script to start all calculations (run
     locally or submitt batch job file, depending on ``machine.subcmd``).
 
     Parameters
     ----------
     mode : str, optional
         Fine tune how to write input files (based on ``templates``) to calc
         dirs calc_foo/0/, calc_foo/1/, ... . Note that this doesn't change
         the base dir calc_foo at all, only the subdirs for each calc.
         {'a', 'w'}
         
         | 'a': Append mode (default). If a previous database is found, then
         |     subsequent calculations are numbered based on the last 'idx'.
         |     calc_foo/0 # old
         |     calc_foo/1 # old
         |     calc_foo/2 # new
         |     calc_foo/3 # new
         | 'w': Write mode. The target dirs are purged and overwritten. Also,
         |     the database (self.dbfn) is overwritten. Use this to
         |     iteratively tune your inputs, NOT for working on already
         |     present results!
         |     calc_foo/0 # new
         |     calc_foo/1 # new
     backup : bool, optional
         Before writing anything, do a backup of self.calc_dir if it already
         exists.
     sleep : int, optional
         For the script to start (submitt) all jobs: time in seconds for the
         shell sleep(1) commmand.
     excl : bool
         If in append mode, a file <calc_root>/excl_push with all indices of
         calculations from old revisions is written. Can be used with
         ``rsync --exclude-from=excl_push`` when pushing appended new
         calculations to a cluster.
     """
     assert mode in ['a', 'w'], "Unknown mode: '%s'" %mode
     if os.path.exists(self.dbfn):
         if backup:
             common.backup(self.dbfn)
         if mode == 'w':
             os.remove(self.dbfn)
     have_new_db = not os.path.exists(self.dbfn)
     common.makedirs(self.calc_root)
     # this call creates a file ``self.dbfn`` if it doesn't exist
     sqldb = SQLiteDB(self.dbfn, table=self.db_table)
     # max_idx: counter for calc dir numbering
     revision = 0
     if have_new_db:
         max_idx = -1
     else:
         if mode == 'a':
             if sqldb.has_column('idx'):
                 max_idx = sqldb.execute("select max(idx) from %s" \
                 %self.db_table).fetchone()[0]
             else:
                 raise StandardError("database '%s': table '%s' has no "
                       "column 'idx', don't know how to number calcs"
                       %(self.dbfn, self.db_table))
             if sqldb.has_column('revision'):
                 revision = int(sqldb.get_single("select max(revision) \
                     from %s" %self.db_table)) + 1
         elif mode == 'w':
             max_idx = -1
     sql_records = []
     hostnames = []
     for imach, machine in enumerate(self.machines):
         hostnames.append(machine.hostname)
         calc_dir = pj(self.calc_root, self.calc_dir_prefix + \
                       '_%s' %machine.hostname)
         if os.path.exists(calc_dir):
             if backup:
                 common.backup(calc_dir)
             if mode == 'w':
                 common.system("rm -r %s" %calc_dir, wait=True)
         run_txt = "here=$(pwd)\n"
         for _idx, params in enumerate(self.params_lst):
             params = common.flatten(params)
             idx = max_idx + _idx + 1
             calc_subdir = pj(calc_dir, str(idx))
             extra_dct = \
                 {'revision': revision,
                  'study_name': self.study_name,
                  'idx': idx,
                  'calc_name' : self.study_name + "_run%i" %idx,
                  }
             extra_params = [SQLEntry(key=key, sqlval=val) for key,val in \
                             extra_dct.iteritems()]
             # templates[:] to copy b/c they may be modified in Calculation
             calc = Calculation(machine=machine,
                                templates=self.templates[:], 
                                params=params + extra_params,
                                calc_dir=calc_subdir,
                                )
             if mode == 'w' and os.path.exists(calc_subdir):
                 shutil.rmtree(calc_subdir)
             calc.write_input()                               
             run_txt += "cd %i && %s %s && cd $here && sleep %i\n" %(idx,\
                         machine.subcmd, machine.get_jobfile_basename(), sleep)
             if imach == 0:                            
                 sql_records.append(calc.get_sql_record())
         common.file_write(pj(calc_dir, 'run.sh'), run_txt)
     for record in sql_records:
         record['hostname'] = SQLEntry(sqlval=','.join(hostnames))
     # for incomplete parameters: collect header parts from all records and
     # make a set = unique entries
     raw_header = [(key, entry.sqltype.upper()) for record in sql_records \
         for key, entry in record.iteritems()]
     header = list(set(raw_header))
     if have_new_db:
         sqldb.create_table(header)
     else:
         for record in sql_records:
             for key, entry in record.iteritems():
                 if not sqldb.has_column(key):
                     sqldb.add_column(key, entry.sqltype.upper())
     for record in sql_records:
         cmd = "insert into %s (%s) values (%s)"\
             %(self.db_table,
               ",".join(record.keys()),
               ",".join(['?']*len(record.keys())))
         sqldb.execute(cmd, tuple(entry.sqlval for entry in record.itervalues()))
     if excl and revision > 0 and sqldb.has_column('revision'):
         old_idx_lst = [str(x) for x, in sqldb.execute("select idx from calc where \
                                                       revision < ?", (revision,))]
         common.file_write(pj(self.calc_root, 'excl_push'),
                           '\n'.join(old_idx_lst))
     sqldb.finish()
Exemplo n.º 2
0
 def write_input(self, mode='a', backup=True, sleep=0, excl=True):
     """
     Create calculation dir(s) for each parameter set and write input files
     based on ``templates``. Write sqlite database storing all relevant
     parameters. Write (bash) shell script to start all calculations (run
     locally or submitt batch job file, depending on ``machine.subcmd``).
 
     Parameters
     ----------
     mode : str, optional
         Fine tune how to write input files (based on ``templates``) to calc
         dirs calc_foo/0/, calc_foo/1/, ... . Note that this doesn't change
         the base dir calc_foo at all, only the subdirs for each calc.
         {'a', 'w'}
         
         | 'a': Append mode (default). If a previous database is found, then
         |     subsequent calculations are numbered based on the last 'idx'.
         |     calc_foo/0 # old
         |     calc_foo/1 # old
         |     calc_foo/2 # new
         |     calc_foo/3 # new
         | 'w': Write mode. The target dirs are purged and overwritten. Also,
         |     the database (self.dbfn) is overwritten. Use this to
         |     iteratively tune your inputs, NOT for working on already
         |     present results!
         |     calc_foo/0 # new
         |     calc_foo/1 # new
     backup : bool, optional
         Before writing anything, do a backup of self.calc_dir if it already
         exists.
     sleep : int, optional
         For the script to start (submitt) all jobs: time in seconds for the
         shell sleep(1) commmand.
     excl : bool
         If in append mode, a file <calc_root>/excl_push with all indices of
         calculations from old revisions is written. Can be used with
         ``rsync --exclude-from=excl_push`` when pushing appended new
         calculations to a cluster.
     """
     assert mode in ['a', 'w'], "Unknown mode: '%s'" % mode
     if os.path.exists(self.dbfn):
         if backup:
             common.backup(self.dbfn)
         if mode == 'w':
             os.remove(self.dbfn)
     have_new_db = not os.path.exists(self.dbfn)
     common.makedirs(self.calc_root)
     # this call creates a file ``self.dbfn`` if it doesn't exist
     sqldb = SQLiteDB(self.dbfn, table=self.db_table)
     # max_idx: counter for calc dir numbering
     revision = 0
     if have_new_db:
         max_idx = -1
     else:
         if mode == 'a':
             if sqldb.has_column('idx'):
                 max_idx = sqldb.execute("select max(idx) from %s" \
                 %self.db_table).fetchone()[0]
             else:
                 raise Exception(
                     "database '%s': table '%s' has no "
                     "column 'idx', don't know how to number calcs" %
                     (self.dbfn, self.db_table))
             if sqldb.has_column('revision'):
                 revision = int(
                     sqldb.get_single("select max(revision) \
                     from %s" % self.db_table)) + 1
         elif mode == 'w':
             max_idx = -1
     sql_records = []
     hostnames = []
     for imach, machine in enumerate(self.machines):
         hostnames.append(machine.hostname)
         calc_dir = pj(self.calc_root, self.calc_dir_prefix + \
                       '_%s' %machine.hostname)
         if os.path.exists(calc_dir):
             if backup:
                 common.backup(calc_dir)
             if mode == 'w':
                 common.system("rm -r %s" % calc_dir, wait=True)
         run_txt = "here=$(pwd)\n"
         for _idx, params in enumerate(self.params_lst):
             params = common.flatten(params)
             idx = max_idx + _idx + 1
             calc_subdir = pj(calc_dir, str(idx))
             extra_dct = \
                 {'revision': revision,
                  'study_name': self.study_name,
                  'idx': idx,
                  'calc_name' : self.study_name + "_run%i" %idx,
                  }
             extra_params = [SQLEntry(key=key, sqlval=val) for key,val in \
                             extra_dct.items()]
             # templates[:] to copy b/c they may be modified in Calculation
             calc = Calculation(
                 machine=machine,
                 templates=self.templates[:],
                 params=params + extra_params,
                 calc_dir=calc_subdir,
             )
             if mode == 'w' and os.path.exists(calc_subdir):
                 shutil.rmtree(calc_subdir)
             calc.write_input()
             run_txt += "cd %i && %s %s && cd $here && sleep %i\n" %(idx,\
                         machine.subcmd, machine.get_jobfile_basename(), sleep)
             if imach == 0:
                 sql_records.append(calc.get_sql_record())
         common.file_write(pj(calc_dir, 'run.sh'), run_txt)
     for record in sql_records:
         record['hostname'] = SQLEntry(sqlval=','.join(hostnames))
     # for incomplete parameters: collect header parts from all records and
     # make a set = unique entries
     raw_header = [(key, entry.sqltype.upper()) for record in sql_records \
         for key, entry in record.items()]
     header = list(set(raw_header))
     if have_new_db:
         sqldb.create_table(header)
     else:
         for record in sql_records:
             for key, entry in record.items():
                 if not sqldb.has_column(key):
                     sqldb.add_column(key, entry.sqltype.upper())
     for record in sql_records:
         cmd = "insert into %s (%s) values (%s)"\
             %(self.db_table,
               ",".join(list(record.keys())),
               ",".join(['?']*len(list(record.keys()))))
         sqldb.execute(cmd,
                       tuple(entry.sqlval for entry in record.values()))
     if excl and revision > 0 and sqldb.has_column('revision'):
         old_idx_lst = [
             str(x) for x, in sqldb.execute(
                 "select idx from calc where \
                                                       revision < ?", (
                     revision, ))
         ]
         common.file_write(pj(self.calc_root, 'excl_push'),
                           '\n'.join(old_idx_lst))
     sqldb.finish()
Exemplo n.º 3
0
def test_sql():
    # Check for sqlite3 command line tool. In Python 3.3, we can use
    # shutil.which().
    have_sqlite3 = False
    for pp in sys.path:
        exe = pj(pp, 'sqlite3')
        if os.path.isfile(exe):
            print("found:", exe)
            have_sqlite3 = True
            break

    # --- SQLiteDB ----------------------------------------------------
    dbfn = pj(testdir, 'test.db')
    if os.path.exists(dbfn):
        os.remove(dbfn)

    header = [('idx', 'INTEGER'), ('foo', 'REAL'), ('bar', 'TEXT')]
    db = SQLiteDB(dbfn, table='calc')
    db.execute("CREATE TABLE calc (%s)" %','.join("%s %s" %(x[0], x[1]) \
                                                  for x in header))

    vals = [[0, 1.1, 'a'], [1, 2.2, 'b'], [2, 3.3, 'c']]
    for lst in vals:
        db.execute("INSERT INTO calc (idx, foo, bar) VALUES (?,?,?)",
                   tuple(lst))
    db.commit()

    # get_max_rowid
    assert db.get_max_rowid() == 3

    # has_table
    assert db.has_table('calc')
    assert not db.has_table('foo')

    # has_column
    assert db.has_column('idx')
    assert not db.has_column('grrr')

    # get_single
    assert float(db.get_single("select foo from calc where idx==0")) == 1.1

    assert header == db.get_header()

    if have_sqlite3:
        # call sqlite3, the cmd line interface
        assert common.backtick("sqlite3 %s 'select * from calc'" %dbfn) \
            == '0|1.1|a\n1|2.2|b\n2|3.3|c\n'

    # ret =
    # [(0, 1.1000000000000001, u'a'),
    # (1, 2.2000000000000002, u'b'),
    # (2, 3.2999999999999998, u'c')]
    ret = db.execute("select * from calc").fetchall()
    for idx, lst in enumerate(vals):
        assert list(ret[idx]) == lst

    # generator object, yields
    # tup = (0, 1.1000000000000001, u'a')
    # tup = (1, 2.2000000000000002, u'b')
    # tup = (2, 3.2999999999999998, u'c')
    itr = db.execute("select * from calc")
    for idx, tup in enumerate(itr):
        assert list(tup) == vals[idx]

    # [(0, 1.1000000000000001, u'a')]
    assert db.execute("select * from calc where idx==0").fetchall() == \
        [tuple(vals[0])]
    # (0, 1.1000000000000001, u'a')
    assert db.execute("select * from calc where idx==0").fetchone() == \
        tuple(vals[0])

    assert db.execute("select bar from calc where idx==0").fetchone()[0] == \
        'a'

    # get_list1d(), get_array1d(), get_array()
    assert db.get_list1d("select idx from calc") == [0, 1, 2]
    np.testing.assert_array_equal(db.get_array1d("select idx from calc"),
                                  np.array([0, 1, 2]))
    np.testing.assert_array_equal(db.get_array("select idx from calc"),
                                  np.array([0, 1, 2])[:, None])
    np.testing.assert_array_equal(
        db.get_array("select idx,foo from calc"),
        np.array(vals, dtype='S3')[:, :2].astype(float))

    # add_column(), fill with values
    db.add_column('baz', 'TEXT')
    add_header = [('baz', 'TEXT')]
    header += add_header
    assert db.get_header() == header
    db.execute("UPDATE %s SET baz='xx' where idx==0" % db.table)
    db.execute("UPDATE %s SET baz='yy' where idx==1" % db.table)
    db.execute("UPDATE %s SET baz=? where idx==2" % db.table, ('zz', ))
    db.commit()
    if have_sqlite3:
        print(common.backtick("sqlite3 %s 'select * from calc'" % dbfn))
    print(db.execute("select baz from calc").fetchall())
    assert db.execute("select baz from calc").fetchall() == \
        [('xx',), ('yy',), ('zz',)]

    # add even more cols with add_columns()
    add_header = [('bob', 'TEXT'), ('alice', 'BLOB')]
    header += add_header
    db.add_columns(add_header)
    assert db.get_header() == header

    # create_table()
    dbfn2 = pj(testdir, 'test2.db')
    header2 = [('a', 'REAL'), ('b', 'TEXT')]
    db2 = SQLiteDB(dbfn2, table='foo')
    db2.create_table(header2)
    assert db2.get_header() == header2

    # get_dict()
    dct = db.get_dict("select foo,bar from calc")
    cols = [x[0] for x in db.get_header()]
    for key in ['foo', 'bar']:
        assert key in cols
    foo = db.get_list1d("select foo from calc")
    bar = db.get_list1d("select bar from calc")
    assert foo == dct['foo']
    assert bar == dct['bar']
Exemplo n.º 4
0
def test_sql():
    # Check for sqlite3 command line tool. In Python 3.3, we can use
    # shutil.which().
    have_sqlite3 = False
    for pp in sys.path:
        exe = pj(pp, 'sqlite3')
        if os.path.isfile(exe):
            print "found:", exe
            have_sqlite3 = True
            break
        
    # --- SQLiteDB ----------------------------------------------------
    dbfn = pj(testdir, 'test.db')
    if os.path.exists(dbfn):
        os.remove(dbfn)

    header = [('idx', 'INTEGER'), ('foo', 'REAL'), ('bar', 'TEXT')]
    db = SQLiteDB(dbfn, table='calc')
    db.execute("CREATE TABLE calc (%s)" %','.join("%s %s" %(x[0], x[1]) \
                                                  for x in header)) 

    vals = [[0, 1.1, 'a'],
            [1, 2.2, 'b'],
            [2, 3.3, 'c']]
    for lst in vals:
        db.execute("INSERT INTO calc (idx, foo, bar) VALUES (?,?,?)", tuple(lst))
    db.commit()
    
    # get_max_rowid
    assert db.get_max_rowid() == 3

    # has_table
    assert db.has_table('calc')
    assert not db.has_table('foo')
    
    # has_column
    assert db.has_column('idx')
    assert not db.has_column('grrr')

    # get_single
    assert float(db.get_single("select foo from calc where idx==0")) == 1.1

    assert header == db.get_header()
    
    if have_sqlite3:
        # call sqlite3, the cmd line interface
        assert common.backtick("sqlite3 %s 'select * from calc'" %dbfn) \
            == '0|1.1|a\n1|2.2|b\n2|3.3|c\n'

    # ret = 
    # [(0, 1.1000000000000001, u'a'),
    # (1, 2.2000000000000002, u'b'),
    # (2, 3.2999999999999998, u'c')]
    ret = db.execute("select * from calc").fetchall()
    for idx, lst in enumerate(vals):
        assert list(ret[idx]) == lst

    # generator object, yields
    # tup = (0, 1.1000000000000001, u'a')
    # tup = (1, 2.2000000000000002, u'b')
    # tup = (2, 3.2999999999999998, u'c')
    itr = db.execute("select * from calc")
    for idx, tup in enumerate(itr):
        assert list(tup) == vals[idx]

    # [(0, 1.1000000000000001, u'a')]
    assert db.execute("select * from calc where idx==0").fetchall() == \
        [tuple(vals[0])]
    # (0, 1.1000000000000001, u'a')
    assert db.execute("select * from calc where idx==0").fetchone() == \
        tuple(vals[0])

    assert db.execute("select bar from calc where idx==0").fetchone()[0] == \
        'a'
    
    # get_list1d(), get_array1d(), get_array()
    assert db.get_list1d("select idx from calc") == [0,1,2]
    np.testing.assert_array_equal(db.get_array1d("select idx from calc"),
                                  np.array([0,1,2]))
    np.testing.assert_array_equal(db.get_array("select idx from calc"), 
                                  np.array([0,1,2])[:,None])
    np.testing.assert_array_equal(db.get_array("select idx,foo from calc"), 
                                  np.array(vals, dtype='S3')[:,:2].astype(float))

    # add_column(), fill with values
    db.add_column('baz', 'TEXT')
    add_header = [('baz', 'TEXT')]
    header += add_header
    assert db.get_header() == header
    db.execute("UPDATE %s SET baz='xx' where idx==0" %db.table)
    db.execute("UPDATE %s SET baz='yy' where idx==1" %db.table)
    db.execute("UPDATE %s SET baz=? where idx==2" %db.table, ('zz',))
    db.commit()
    if have_sqlite3:
        print common.backtick("sqlite3 %s 'select * from calc'" %dbfn)
    print db.execute("select baz from calc").fetchall()
    assert db.execute("select baz from calc").fetchall() == \
        [(u'xx',), (u'yy',), (u'zz',)]
    
    # add even more cols with add_columns()
    add_header = [('bob', 'TEXT'), ('alice', 'BLOB')]
    header += add_header
    db.add_columns(add_header)
    assert db.get_header() == header
    
    # create_table()
    dbfn2 = pj(testdir, 'test2.db')
    header2 = [('a', 'REAL'), ('b', 'TEXT')]
    db2 = SQLiteDB(dbfn2, table='foo')
    db2.create_table(header2)
    assert db2.get_header() == header2
    
    # get_dict()
    dct = db.get_dict("select foo,bar from calc")
    cols = [x[0] for x in db.get_header()]
    for key in ['foo', 'bar']:
        assert key in cols
    foo = db.get_list1d("select foo from calc")
    bar = db.get_list1d("select bar from calc")
    assert foo == dct['foo']
    assert bar == dct['bar']