def _write(self, atoms, key_value_pairs, data): Database._write(self, atoms, key_value_pairs, data) bigdct = {} ids = [] nextid = 1 if (isinstance(self.filename, basestring) and os.path.isfile(self.filename)): try: bigdct, ids, nextid = self._read_json() except (SyntaxError, ValueError): pass if isinstance(atoms, AtomsRow): row = atoms unique_id = row.unique_id for id in ids: if bigdct[id]['unique_id'] == unique_id: break else: id = None mtime = now() else: row = AtomsRow(atoms) row.ctime = mtime = now() row.user = os.getenv('USER') id = None dct = {} for key in row.__dict__: if key[0] == '_' or key in row._keys or key == 'id': continue dct[key] = row[key] dct['mtime'] = mtime kvp = key_value_pairs or row.key_value_pairs if kvp: dct['key_value_pairs'] = kvp data = data or row.get('data') if data: dct['data'] = data constraints = row.get('constraints') if constraints: dct['constraints'] = constraints if id is None: id = nextid ids.append(id) nextid += 1 bigdct[id] = dct self._write_json(bigdct, ids, nextid) return id
def update(self, ids, add_keywords=[], **add_key_value_pairs): check(add_keywords, add_key_value_pairs) if isinstance(ids, int): ids = [ids] bigdct, myids, nextid = self._read_json() t = now() m = 0 n = 0 for id in ids: dct = bigdct[id] keywords = dct.setdefault('keywords', []) key_value_pairs = dct.setdefault('key_value_pairs', {}) if add_keywords: for keyword in add_keywords: assert keyword not in key_value_pairs if keyword not in keywords: keywords.append(keyword) m += 1 if add_key_value_pairs: for keyword in keywords: assert keyword not in add_key_value_pairs n -= len(key_value_pairs) key_value_pairs.update(add_key_value_pairs) n += len(key_value_pairs) dct['mtime'] = t self._write_json(bigdct, myids, nextid) return m, n
def create_table( row, # AtomsRow header, # List[str] keys, # List[str] key_descriptions, # Dict[str, Tuple[str, str, str]] digits=3 # int ): # -> Dict[str, Any] """Create table-dict from row.""" table = [] for key in keys: if key == 'age': age = float_to_time_string(now() - row.ctime, True) table.append(('Age', age)) continue value = row.get(key) if value is not None: if isinstance(value, float): value = '{:.{}f}'.format(value, digits) elif not isinstance(value, str): value = str(value) desc, unit = key_descriptions.get(key, ['', key, ''])[1:] if unit: value += ' ' + unit table.append((desc, value)) return {'type': 'table', 'header': header, 'rows': table}
def row2dct( row, key_descriptions: Dict[str, Tuple[str, str, str]] = {}) -> Dict[str, Any]: """Convert row to dict of things for printing or a web-page.""" from ase.db.core import float_to_time_string, now dct = {} atoms = Atoms(cell=row.cell, pbc=row.pbc) dct['size'] = kptdensity2monkhorstpack(atoms, kptdensity=1.8, even=False) dct['cell'] = [['{:.3f}'.format(a) for a in axis] for axis in row.cell] par = ['{:.3f}'.format(x) for x in cell_to_cellpar(row.cell)] dct['lengths'] = par[:3] dct['angles'] = par[3:] stress = row.get('stress') if stress is not None: dct['stress'] = ', '.join('{0:.3f}'.format(s) for s in stress) dct['formula'] = Formula(row.formula).format('abc') dipole = row.get('dipole') if dipole is not None: dct['dipole'] = ', '.join('{0:.3f}'.format(d) for d in dipole) data = row.get('data') if data: dct['data'] = ', '.join(data.keys()) constraints = row.get('constraints') if constraints: dct['constraints'] = ', '.join(c.__class__.__name__ for c in constraints) keys = ({'id', 'energy', 'fmax', 'smax', 'mass', 'age'} | set(key_descriptions) | set(row.key_value_pairs)) dct['table'] = [] for key in keys: if key == 'age': age = float_to_time_string(now() - row.ctime, True) dct['table'].append(('ctime', 'Age', age)) continue value = row.get(key) if value is not None: if isinstance(value, float): value = '{:.3f}'.format(value) elif not isinstance(value, str): value = str(value) desc, unit = key_descriptions.get(key, ['', '', ''])[1:] if unit: value += ' ' + unit dct['table'].append((key, desc, value)) return dct
def _update(self, id, key_value_pairs, data=None): """Update key_value_pairs and data for a single row """ encode = self.encode ext_tab = key_value_pairs.pop('external_tables', {}) con = self.connection or self._connect() self._initialize(con) cur = con.cursor() mtime = now() cur.execute('UPDATE systems SET mtime=?, key_value_pairs=? WHERE id=?', (mtime, encode(key_value_pairs), id)) if data: if not isinstance(data, (str, bytes)): data = encode(data, binary=self.version >= 9) cur.execute('UPDATE systems set data=? where id=?', (data, id)) self._delete(cur, [id], ['keys', 'text_key_values', 'number_key_values']) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, str) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) for tabname, values in ext_tab.items(): try: dtype = self._guess_type(values) values['id'] = id self._create_table_if_not_exists(tabname, dtype, db_con=con) self._insert_in_external_table(cur, name=tabname, entries=values) except ValueError as exc: # Close the connection without committing if self.connection is None: con.close() # Raise the error again raise ValueError(exc) if self.connection is None: con.commit() con.close() return id
def set_columns(self, columns): self.values = [] for c in columns: if c == 'age': value = float_to_time_string(now() - self.dct.ctime) elif c == 'pbc': value = ''.join('FT'[p] for p in self.dct.pbc) else: value = getattr(self.dct, c, None) self.values.append(value)
def set_columns(self, columns): self.values = [] for c in columns: if c == 'age': value = float_to_time_string(now() - self.dct.ctime) elif c == 'pbc': value = ''.join('FT'[int(p)] for p in self.dct.pbc) else: value = getattr(self.dct, c, None) self.values.append(value)
def _write(self, atoms, key_value_pairs, data, id): Database._write(self, atoms, key_value_pairs, data) bigdct = {} ids = [] nextid = 1 if (isinstance(self.filename, str) and os.path.isfile(self.filename)): try: bigdct, ids, nextid = self._read_json() except (SyntaxError, ValueError): pass mtime = now() if isinstance(atoms, AtomsRow): row = atoms else: row = AtomsRow(atoms) row.ctime = mtime row.user = os.getenv('USER') dct = {} for key in row.__dict__: if key[0] == '_' or key in row._keys or key == 'id': continue dct[key] = row[key] dct['mtime'] = mtime if key_value_pairs: dct['key_value_pairs'] = key_value_pairs if data: dct['data'] = data constraints = row.get('constraints') if constraints: dct['constraints'] = constraints if id is None: id = nextid ids.append(id) nextid += 1 else: assert id in bigdct bigdct[id] = dct self._write_json(bigdct, ids, nextid) return id
def _update(self, id, key_value_pairs, data=None): """Update key_value_pairs and data for a single row """ encode = self.encode ext_tables = key_value_pairs.pop('external_tables', {}) for k, v in ext_tables.items(): dtype = self._guess_type(v) self._create_table_if_not_exists(k, dtype) mtime = now() with self.managed_connection() as con: cur = con.cursor() cur.execute( 'UPDATE systems SET mtime=?, key_value_pairs=? WHERE id=?', (mtime, encode(key_value_pairs), id)) if data: if not isinstance(data, (str, bytes)): data = encode(data, binary=self.version >= 9) cur.execute('UPDATE systems set data=? where id=?', (data, id)) self._delete(cur, [id], ['keys', 'text_key_values', 'number_key_values']) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, str) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) # Insert entries in the valid tables for tabname in ext_tables.keys(): entries = ext_tables[tabname] entries['id'] = id self._insert_in_external_table(cur, name=tabname, entries=ext_tables[tabname]) return id
def _write(self, atoms, keywords, key_value_pairs, data): Database._write(self, atoms, keywords, key_value_pairs, data) bigdct = {} ids = [] nextid = 1 if isinstance(self.filename, str) and os.path.isfile(self.filename): try: bigdct, ids, nextid = self._read_json() except (SyntaxError, ValueError): pass if isinstance(atoms, dict): dct = dict((key, atoms[key]) for key in reserved_keys if key in atoms and key != 'id') unique_id = dct['unique_id'] for id in ids: if bigdct[id]['unique_id'] == unique_id: break else: id = None dct['mtime'] = now() else: dct = self.collect_data(atoms) id = None for key, value in [('keywords', keywords), ('key_value_pairs', key_value_pairs), ('data', data)]: if value: dct[key] = value else: dct.pop(key, None) if id is None: id = nextid ids.append(id) nextid += 1 bigdct[id] = dct self._write_json(bigdct, ids, nextid) return id
def _write_log(self): """Writes the population to a logfile. The format is:: timestamp: generation(if available): id1,id2,id3...""" if self.logfile is not None: ids = [str(a.info['relax_id']) for a in self.pop] if ids != []: try: gen_nums = [c.info['key_value_pairs']['generation'] for c in self.all_cand] max_gen = max(gen_nums) except KeyError: max_gen = ' ' f = open(self.logfile, 'a') f.write('{time}: {gen}: {pop}\n'.format(time=now(), pop=','.join(ids), gen=max_gen)) f.close()
def _update(self, id, key_value_pairs, data=None): """Update key_value_pairs and data for a single row """ encode = self.encode con = self.connection or self._connect() self._initialize(con) cur = con.cursor() mtime = now() cur.execute( "UPDATE systems SET mtime={}, key_value_pairs='{}' WHERE id={}" .format(mtime, encode(key_value_pairs), id)) if data: cur.execute("UPDATE systems set data='{}' where id={}" .format(encode(data), id)) self._delete(cur, [id], ['keys', 'text_key_values', 'number_key_values']) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, basestring) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) if self.connection is None: con.commit() con.close() return id
def atoms2json(structure, additional_information=None): """Serialize an ASE Structure definition to JSON and return it as a string""" import json, os from ase.db.row import AtomsRow from ase.db.core import now from ase.io.jsonio import MyEncoder as AseJsonEncoder row = AtomsRow(structure) # this is what ASE would store in its DB row.ctime = mtime = now( ) # the Row object has an attribute ctime, but not mtime, # we have to wiggle it into the dict later row.user = os.getenv("USER") dct = row.__dict__.copy() del dct["_keys"], dct["_data"], dct[ "_constraints"] # containing useless default entries that shouldn't be stored dct["mtime"] = mtime dct["key_value_pairs"] = additional_information if additional_information else {} return json.dumps(dct, sort_keys=True, cls=AseJsonEncoder)
def _update(self, ids, delete_keys, add_key_value_pairs): bigdct, myids, nextid = self._read_json() t = now() m = 0 n = 0 for id in ids: dct = bigdct[id] kvp = dct.get('key_value_pairs', {}) n += len(kvp) for key in delete_keys: kvp.pop(key, None) n -= len(kvp) m -= len(kvp) kvp.update(add_key_value_pairs) m += len(kvp) if kvp: dct['key_value_pairs'] = kvp dct['mtime'] = t self._write_json(bigdct, myids, nextid) return m, n
def _write(self, atoms, keywords, key_value_pairs, data): Database._write(self, atoms, keywords, key_value_pairs, data) con = self._connect() self._initialize(con) cur = con.cursor() id = None if isinstance(atoms, dict): dct = atoms unique_id = dct['unique_id'] cur.execute('SELECT id FROM systems WHERE unique_id=?', (unique_id,)) rows = cur.fetchall() if rows: id = rows[0][0] self._delete(cur, [id], ['keywords', 'text_key_values', 'number_key_values']) dct['mtime'] = now() else: dct = self.collect_data(atoms) if 'constraints' in dct: constraints = encode(dct['constraints']) else: constraints = None numbers = dct.get('numbers') row = (dct['unique_id'], dct['ctime'], dct['mtime'], dct['user'], blob(numbers), blob(dct.get('positions')), blob(dct.get('cell')), int(np.dot(dct.get('pbc'), [1, 2, 4])), blob(dct.get('initial_magmoms')), blob(dct.get('initial_charges')), blob(dct.get('masses')), blob(dct.get('tags')), blob(dct.get('momenta')), constraints) if 'calculator' in dct: row += (dct['calculator'], encode(dct['calculator_parameters'])) else: row += (None, None) magmom = dct.get('magmom') if magmom is not None: # magmom can be one or three numbers (non-collinear case) magmom = np.array(magmom) row += (dct.get('energy'), dct.get('free_energy'), blob(dct.get('forces')), blob(dct.get('stress')), blob(dct.get('dipole')), blob(dct.get('magmoms')), blob(magmom), blob(dct.get('charges')), encode(keywords), encode(key_value_pairs), encode(data), len(numbers)) if id is None: q = self.default + ', ' + ', '.join('?' * len(row)) cur.execute('INSERT INTO systems VALUES ({0})'.format(q), row) else: q = ', '.join(line.split()[0].lstrip() + '=?' for line in init_statements[0].splitlines()[2:]) cur.execute('UPDATE systems SET {0} WHERE id=?'.format(q), row + (id,)) if id is None: id = self.get_last_id(cur) if len(numbers) > 0: count = np.bincount(numbers) unique_numbers = count.nonzero()[0] species = [(int(Z), int(count[Z]), id) for Z in unique_numbers] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (float, int)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, (str, unicode)) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keywords VALUES (?, ?)', [(keyword, id) for keyword in keywords]) # Insert keys in keywords table also so that it is easy to query # for the existance of keys: cur.executemany('INSERT INTO keywords VALUES (?, ?)', [(key, id) for key in key_value_pairs]) con.commit() con.close() return id
def __init__(self, dct, subscript=None): self.dct = dct self.cell = [['{0:.3f}'.format(a) for a in axis] for axis in dct.cell] forces = dict2forces(dct) if forces is None: fmax = None self.forces = None else: fmax = (forces**2).sum(1).max()**0.5 N = len(forces) self.forces = [] for n, f in enumerate(forces): if n < 5 or n >= N - 5: f = tuple('{0:10.3f}'.format(x) for x in f) symbol = chemical_symbols[dct.numbers[n]] self.forces.append((n, symbol) + f) elif n == 5: self.forces.append( (' ...', '', ' ...', ' ...', ' ...')) self.stress = dct.get('stress') if self.stress is not None: self.stress = ', '.join('{0:.3f}'.format(s) for s in self.stress) if 'masses' in dct: mass = dct.masses.sum() else: mass = atomic_masses[dct.numbers].sum() formula = hill(dct.numbers) if subscript: formula = subscript.sub(r'<sub>\1</sub>', formula) table = [('id', dct.id), ('age', float_to_time_string(now() - dct.ctime, True)), ('formula', formula), ('user', dct.user), ('calculator', dct.get('calculator')), ('energy [eV]', dct.get('energy')), ('fmax [eV/Ang]', fmax), ('charge [|e|]', dct.get('charge')), ('mass [au]', mass), ('unique id', dct.unique_id), ('volume [Ang^3]', abs(np.linalg.det(dct.cell)))] self.table = [(name, value) for name, value in table if value is not None] if 'key_value_pairs' in dct: self.key_value_pairs = sorted(dct.key_value_pairs.items()) else: self.key_value_pairs = None if 'keywords' in dct: self.keywords = ', '.join(sorted(dct.keywords)) else: self.keywords = None self.dipole = dct.get('dipole') if self.dipole is not None: self.dipole = ', '.join('{0:.3f}'.format(d) for d in self.dipole) self.data = dct.get('data') if self.data: self.data = ', '.join(self.data.keys()) self.constraints = dct.get('constraints') if self.constraints: self.constraints = ', '.join(d['name'] for d in self.constraints)
def __init__(self, row, meta={}, subscript=None, prefix='', tmpdir='.'): self.row = row self.cell = [['{:.3f}'.format(a) for a in axis] for axis in row.cell] par = ['{:.3f}'.format(x) for x in cell_to_cellpar(row.cell)] self.lengths = par[:3] self.angles = par[3:] forces = row.get('constrained_forces') if forces is None: fmax = None self.forces = None else: fmax = (forces**2).sum(1).max()**0.5 N = len(forces) self.forces = [] for n, f in enumerate(forces): if n < 5 or n >= N - 5: f = tuple('{0:10.3f}'.format(x) for x in f) symbol = chemical_symbols[row.numbers[n]] self.forces.append((n, symbol) + f) elif n == 5: self.forces.append( (' ...', '', ' ...', ' ...', ' ...')) self.stress = row.get('stress') if self.stress is not None: self.stress = ', '.join('{0:.3f}'.format(s) for s in self.stress) if 'masses' in row: mass = row.masses.sum() else: mass = atomic_masses[row.numbers].sum() self.formula = formula_metal(row.numbers) if subscript: self.formula = subscript.sub(r'<sub>\1</sub>', self.formula) age = float_to_time_string(now() - row.ctime, True) table = dict( (key, value) for key, value in [('id', row.id), ('age', age), ('formula', self.formula), ('user', row.user), ('calculator', row.get('calculator')), ( 'energy', row.get('energy')), ('fmax', fmax), ('charge', row.get('charge')), ('mass', mass), ('magmom', row.get('magmom')), ( 'unique id', row.unique_id), ('volume', row.get('volume'))] if value is not None) table.update(row.key_value_pairs) for key, value in table.items(): if isinstance(value, float): table[key] = '{:.3f}'.format(value) kd = meta.get('key_descriptions', {}) misc = set(table.keys()) self.layout = [] for headline, columns in meta['layout']: empty = True newcolumns = [] for column in columns: newcolumn = [] for block in column: if block is None: pass elif isinstance(block, tuple): title, keys = block rows = [] for key in keys: value = table.get(key, None) if value is not None: if key in misc: misc.remove(key) desc, unit = kd.get(key, [0, key, ''])[1:] rows.append((desc, value, unit)) if rows: block = (title, rows) else: continue elif any(block.endswith(ext) for ext in ['.png', '.csv']): name = op.join(tmpdir, prefix + block) if not op.isfile(name): self.create_figures(row, prefix, tmpdir, meta['functions']) if op.getsize(name) == 0: # Skip empty files: block = None elif block.endswith('.csv'): block = read_csv_table(name) else: assert block in ['ATOMS', 'CELL', 'FORCES'], block newcolumn.append(block) if block is not None: empty = False newcolumns.append(newcolumn) if not empty: self.layout.append((headline, newcolumns)) if misc: rows = [] for key in sorted(misc): value = table[key] desc, unit = kd.get(key, [0, key, ''])[1:] rows.append((desc, value, unit)) self.layout.append(('Miscellaneous', [[('Items', rows)]])) self.dipole = row.get('dipole') if self.dipole is not None: self.dipole = ', '.join('{0:.3f}'.format(d) for d in self.dipole) self.data = row.get('data') if self.data: self.data = ', '.join(self.data.keys()) self.constraints = row.get('constraints') if self.constraints: self.constraints = ', '.join(d['name'] for d in self.constraints)
def _write(self, atoms, key_value_pairs, data, id): ext_tables = key_value_pairs.pop("external_tables", {}) Database._write(self, atoms, key_value_pairs, data) mtime = now() encode = self.encode blob = self.blob if not isinstance(atoms, AtomsRow): row = AtomsRow(atoms) row.ctime = mtime row.user = os.getenv('USER') else: row = atoms # Extract the external tables from AtomsRow names = self._get_external_table_names() for name in names: new_table = row.get(name, {}) if new_table: ext_tables[name] = new_table if not id and not key_value_pairs and not ext_tables: key_value_pairs = row.key_value_pairs for k, v in ext_tables.items(): dtype = self._guess_type(v) self._create_table_if_not_exists(k, dtype) constraints = row._constraints if constraints: if isinstance(constraints, list): constraints = encode(constraints) else: constraints = None values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers), blob(row.positions), blob(row.cell), int(np.dot(row.pbc, [1, 2, 4])), blob(row.get('initial_magmoms')), blob(row.get('initial_charges')), blob(row.get('masses')), blob(row.get('tags')), blob(row.get('momenta')), constraints) if 'calculator' in row: values += (row.calculator, encode(row.calculator_parameters)) else: values += (None, None) if not data: data = row._data with self.managed_connection() as con: if not isinstance(data, (str, bytes)): data = encode(data, binary=self.version >= 9) values += (row.get('energy'), row.get('free_energy'), blob(row.get('forces')), blob(row.get('stress')), blob(row.get('dipole')), blob(row.get('magmoms')), row.get('magmom'), blob(row.get('charges')), encode(key_value_pairs), data, len(row.numbers), float_if_not_none(row.get('fmax')), float_if_not_none(row.get('smax')), float_if_not_none(row.get('volume')), float(row.mass), float(row.charge)) cur = con.cursor() if id is None: q = self.default + ', ' + ', '.join('?' * len(values)) cur.execute('INSERT INTO systems VALUES ({})'.format(q), values) id = self.get_last_id(cur) else: self._delete(cur, [id], [ 'keys', 'text_key_values', 'number_key_values', 'species' ]) q = ', '.join(name + '=?' for name in self.columnnames[1:]) cur.execute('UPDATE systems SET {} WHERE id=?'.format(q), values + (id, )) count = row.count_atoms() if count: species = [(atomic_numbers[symbol], n, id) for symbol, n in count.items()] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, str) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) # Insert entries in the valid tables for tabname in ext_tables.keys(): entries = ext_tables[tabname] entries['id'] = id self._insert_in_external_table(cur, name=tabname, entries=ext_tables[tabname]) return id
def _write(self, atoms, key_value_pairs, data): Database._write(self, atoms, key_value_pairs, data) con = self.connection or self._connect() self._initialize(con) cur = con.cursor() id = None if not isinstance(atoms, AtomsRow): row = AtomsRow(atoms) row.ctime = mtime = now() row.user = os.getenv('USER') else: row = atoms cur.execute('SELECT id FROM systems WHERE unique_id=?', (row.unique_id,)) results = cur.fetchall() if results: id = results[0][0] self._delete(cur, [id], ['keys', 'text_key_values', 'number_key_values']) mtime = now() constraints = row._constraints if constraints: if isinstance(constraints, list): constraints = encode(constraints) else: constraints = None values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers), blob(row.positions), blob(row.cell), int(np.dot(row.pbc, [1, 2, 4])), blob(row.get('initial_magmoms')), blob(row.get('initial_charges')), blob(row.get('masses')), blob(row.get('tags')), blob(row.get('momenta')), constraints) if 'calculator' in row: if not isinstance(row.calculator_parameters, basestring): row.calculator_parameters = encode(row.calculator_parameters) values += (row.calculator, row.calculator_parameters) else: values += (None, None) if key_value_pairs is None: key_value_pairs = row.key_value_pairs if not data: data = row._data if not isinstance(data, basestring): data = encode(data) values += (row.get('energy'), row.get('free_energy'), blob(row.get('forces')), blob(row.get('stress')), blob(row.get('dipole')), blob(row.get('magmoms')), row.get('magmom'), blob(row.get('charges')), encode(key_value_pairs), data, len(row.numbers), float_if_not_none(row.get('fmax')), float_if_not_none(row.get('smax')), float_if_not_none(row.get('volume')), float(row.mass), float(row.charge)) if id is None: q = self.default + ', ' + ', '.join('?' * len(values)) cur.execute('INSERT INTO systems VALUES ({0})'.format(q), values) else: q = ', '.join(line.split()[0].lstrip() + '=?' for line in init_statements[0].splitlines()[2:]) cur.execute('UPDATE systems SET {0} WHERE id=?'.format(q), values + (id,)) if id is None: id = self.get_last_id(cur) count = row.count_atoms() if count: species = [(atomic_numbers[symbol], n, id) for symbol, n in count.items()] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (float, int)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, basestring) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) if self.connection is None: con.commit() con.close() return id
def _write(self, atoms, key_value_pairs, data, id): ext_tables = key_value_pairs.pop("external_tables", {}) Database._write(self, atoms, key_value_pairs, data) encode = self.encode con = self.connection or self._connect() self._initialize(con) cur = con.cursor() mtime = now() blob = self.blob text_key_values = [] number_key_values = [] if not isinstance(atoms, AtomsRow): row = AtomsRow(atoms) row.ctime = mtime row.user = os.getenv('USER') else: row = atoms # Extract the external tables from AtomsRow names = self._get_external_table_names(db_con=con) for name in names: new_table = row.get(name, {}) if new_table: ext_tables[name] = new_table if id: self._delete( cur, [id], ['keys', 'text_key_values', 'number_key_values', 'species']) else: if not key_value_pairs: key_value_pairs = row.key_value_pairs constraints = row._constraints if constraints: if isinstance(constraints, list): constraints = encode(constraints) else: constraints = None values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers), blob(row.positions), blob(row.cell), int(np.dot(row.pbc, [1, 2, 4])), blob(row.get('initial_magmoms')), blob(row.get('initial_charges')), blob(row.get('masses')), blob(row.get('tags')), blob(row.get('momenta')), constraints) if 'calculator' in row: values += (row.calculator, encode(row.calculator_parameters)) else: values += (None, None) if not data: data = row._data if not isinstance(data, basestring): data = encode(data) values += (row.get('energy'), row.get('free_energy'), blob(row.get('forces')), blob(row.get('stress')), blob(row.get('dipole')), blob(row.get('magmoms')), row.get('magmom'), blob(row.get('charges')), encode(key_value_pairs), data, len(row.numbers), float_if_not_none(row.get('fmax')), float_if_not_none(row.get('smax')), float_if_not_none(row.get('volume')), float(row.mass), float(row.charge)) if id is None: q = self.default + ', ' + ', '.join('?' * len(values)) cur.execute('INSERT INTO systems VALUES ({})'.format(q), values) id = self.get_last_id(cur) else: q = ', '.join(name + '=?' for name in self.columnnames[1:]) cur.execute('UPDATE systems SET {} WHERE id=?'.format(q), values + (id, )) count = row.count_atoms() if count: species = [(atomic_numbers[symbol], n, id) for symbol, n in count.items()] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, basestring) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) # Update external tables valid_entries = [] for k, v in ext_tables.items(): try: # Guess the type of the value dtype = self._guess_type(v) self._create_table_if_not_exists(k, dtype, db_con=con) v["id"] = id valid_entries.append(k) except ValueError as exc: # Close the connection without committing if self.connection is None: con.close() # Raise error again raise ValueError(exc) # Insert entries in the valid tables for tabname in valid_entries: try: self._insert_in_external_table(cur, name=tabname, entries=ext_tables[tabname]) except ValueError as exc: # Close the connection without committing if self.connection is None: con.close() # Raise the error again raise ValueError(exc) if self.connection is None: con.commit() con.close() return id
def age(self, d): return float_to_time_string(now() - d.ctime)
def _write(self, atoms, key_value_pairs, data, id): Database._write(self, atoms, key_value_pairs, data) encode = self.encode con = self.connection or self._connect() self._initialize(con) cur = con.cursor() mtime = now() blob = self.blob text_key_values = [] number_key_values = [] if not isinstance(atoms, AtomsRow): row = AtomsRow(atoms) row.ctime = mtime row.user = os.getenv('USER') else: row = atoms if id: self._delete(cur, [id], ['keys', 'text_key_values', 'number_key_values', 'species']) else: if not key_value_pairs: key_value_pairs = row.key_value_pairs constraints = row._constraints if constraints: if isinstance(constraints, list): constraints = encode(constraints) else: constraints = None values = (row.unique_id, row.ctime, mtime, row.user, blob(row.numbers), blob(row.positions), blob(row.cell), int(np.dot(row.pbc, [1, 2, 4])), blob(row.get('initial_magmoms')), blob(row.get('initial_charges')), blob(row.get('masses')), blob(row.get('tags')), blob(row.get('momenta')), constraints) if 'calculator' in row: values += (row.calculator, encode(row.calculator_parameters)) else: values += (None, None) if not data: data = row._data if not isinstance(data, basestring): data = encode(data) values += (row.get('energy'), row.get('free_energy'), blob(row.get('forces')), blob(row.get('stress')), blob(row.get('dipole')), blob(row.get('magmoms')), row.get('magmom'), blob(row.get('charges')), encode(key_value_pairs), data, len(row.numbers), float_if_not_none(row.get('fmax')), float_if_not_none(row.get('smax')), float_if_not_none(row.get('volume')), float(row.mass), float(row.charge)) if id is None: q = self.default + ', ' + ', '.join('?' * len(values)) cur.execute('INSERT INTO systems VALUES ({})'.format(q), values) id = self.get_last_id(cur) else: q = ', '.join(name + '=?' for name in self.columnnames[1:]) cur.execute('UPDATE systems SET {} WHERE id=?'.format(q), values + (id,)) count = row.count_atoms() if count: species = [(atomic_numbers[symbol], n, id) for symbol, n in count.items()] cur.executemany('INSERT INTO species VALUES (?, ?, ?)', species) text_key_values = [] number_key_values = [] for key, value in key_value_pairs.items(): if isinstance(value, (numbers.Real, np.bool_)): number_key_values.append([key, float(value), id]) else: assert isinstance(value, basestring) text_key_values.append([key, value, id]) cur.executemany('INSERT INTO text_key_values VALUES (?, ?, ?)', text_key_values) cur.executemany('INSERT INTO number_key_values VALUES (?, ?, ?)', number_key_values) cur.executemany('INSERT INTO keys VALUES (?, ?)', [(key, id) for key in key_value_pairs]) if self.connection is None: con.commit() con.close() return id
def release(self, pub_ids=None, email=None, from_schema='upload', to_schema='public'): """ Transfer dataset from one schema to another""" assert pub_ids or email,\ "Specify either pub_ids or email" assert not (pub_ids and email),\ "Specify either pub_ids or email" con = self.connection or self._connect() cur = con.cursor() assert self.user in ['release', 'catroot', 'postgres'], \ "You don't have permission to perform this operation" if email: cur.execute("""SELECT distinct pub_id FROM {from_schema}.reaction WHERE username = '******'""".format( from_schema=from_schema, username=email)) pub_ids = [id[0] for id in cur.fetchall()] for pub_id in pub_ids: self.stdout.write("""Releasing publication {pub_id} from {from_schema} to {schema} \n""".format(pub_id=pub_id, from_schema=from_schema, schema=to_schema)) mtime = now() cur.execute("""UPDATE {from_schema}.systems SET mtime = {mtime} WHERE unique_id in (SELECT distinct ase_id FROM {from_schema}.publication_system WHERE pub_id = '{pub_id}')""".format( from_schema=from_schema, mtime=mtime, pub_id=pub_id)) columns = get_key_str('systems', start_index=1) cur.execute("""INSERT INTO {schema}.systems ({columns}) SELECT {columns} FROM {from_schema}.systems WHERE unique_id in (SELECT distinct ase_id FROM {from_schema}.publication_system WHERE pub_id = '{pub_id}')""".format( from_schema=from_schema, schema=to_schema, columns=columns, pub_id=pub_id)) columns = get_key_str('publication', start_index=1) # new id cur.execute("""INSERT INTO {schema}.publication ({columns}) SELECT {columns} FROM {from_schema}.publication WHERE pub_id = '{pub_id}'""".format(from_schema=from_schema, schema=to_schema, columns=columns, pub_id=pub_id)) cur.execute("""UPDATE {schema}.publication SET stime = {mtime} WHERE pub_id = '{pub_id}'""".format(schema=to_schema, mtime=mtime, pub_id=pub_id)) cur.execute("""INSERT INTO {schema}.publication_system SELECT * FROM {from_schema}.publication_system WHERE pub_id = '{pub_id}'""".format(from_schema=from_schema, schema=to_schema, columns=columns, pub_id=pub_id)) columns = get_key_str('reaction', start_index=1) # new id cur.execute("""INSERT INTO {schema}.reaction ({columns}) SELECT {columns} FROM {from_schema}.reaction WHERE pub_id = '{pub_id}' ORDER BY {from_schema}.reaction.id RETURNING id""".format(from_schema=from_schema, schema=to_schema, columns=columns, pub_id=pub_id)) new_ids = [id[0] for id in cur.fetchall()] cur.execute("""SELECT * from {from_schema}.reaction_system WHERE ase_id in (SELECT distinct ase_id FROM {from_schema}.publication_system WHERE pub_id = '{pub_id}') ORDER BY id""".format( from_schema=from_schema, pub_id=pub_id)) reaction_system0 = cur.fetchall() reaction_system_values = [] id0 = reaction_system0[0][3] i = 0 for row in reaction_system0: row = list(row) if not id0 == row[3]: i += 1 id0 = row[3] row[3] = new_ids[i] reaction_system_values += [tuple(row)] key_str = get_key_str('reaction_system') insert_command = """ INSERT INTO {schema}.reaction_system ({key_str}) VALUES %s ON CONFLICT DO NOTHING;"""\ .format(schema=to_schema, key_str=key_str) execute_values(cur=cur, sql=insert_command, argslist=reaction_system_values, page_size=1000) self.stdout.write('Transfer complete\n') # if self.user == 'catroot': # if self.connection is None: # con.commit() # self.delete_publication(pub_id, schema='upload') if self.connection is None: con.commit() con.close() return
def __init__(self, dct, subscript=None): self.dct = dct self.cell = [['{0:.3f}'.format(a) for a in axis] for axis in dct.cell] forces = dict2forces(dct) if forces is None: fmax = None self.forces = None else: fmax = (forces**2).sum(1).max()**0.5 N = len(forces) self.forces = [] for n, f in enumerate(forces): if n < 5 or n >= N - 5: f = tuple('{0:10.3f}'.format(x) for x in f) symbol = chemical_symbols[dct.numbers[n]] self.forces.append((n, symbol) + f) elif n == 5: self.forces.append((' ...', '', ' ...', ' ...', ' ...')) self.stress = dct.get('stress') if self.stress is not None: self.stress = ', '.join('{0:.3f}'.format(s) for s in self.stress) if 'masses' in dct: mass = dct.masses.sum() else: mass = atomic_masses[dct.numbers].sum() formula = hill(dct.numbers) if subscript: formula = subscript.sub(r'<sub>\1</sub>', formula) table = [ ('id', dct.id), ('age', float_to_time_string(now() - dct.ctime, True)), ('formula', formula), ('user', dct.user), ('calculator', dct.get('calculator')), ('energy [eV]', dct.get('energy')), ('fmax [eV/Ang]', fmax), ('charge [|e|]', dct.get('charge')), ('mass [au]', mass), ('unique id', dct.unique_id), ('volume [Ang^3]', abs(np.linalg.det(dct.cell)))] self.table = [(name, value) for name, value in table if value is not None] if 'key_value_pairs' in dct: self.key_value_pairs = sorted(dct.key_value_pairs.items()) else: self.key_value_pairs = None if 'keywords' in dct: self.keywords = ', '.join(sorted(dct.keywords)) else: self.keywords = None self.dipole = dct.get('dipole') if self.dipole is not None: self.dipole = ', '.join('{0:.3f}'.format(d) for d in self.dipole) self.data = dct.get('data') if self.data: self.data = ', '.join(self.data.keys()) self.constraints = dct.get('constraints') if self.constraints: self.constraints = ', '.join(d['name'] for d in self.constraints)
def __init__(self, row, subscript=None): self.row = row self.cell = [["{0:.3f}".format(a) for a in axis] for axis in row.cell] forces = row.get("constrained_forces") if forces is None: fmax = None self.forces = None else: fmax = (forces ** 2).sum(1).max() ** 0.5 N = len(forces) self.forces = [] for n, f in enumerate(forces): if n < 5 or n >= N - 5: f = tuple("{0:10.3f}".format(x) for x in f) symbol = chemical_symbols[row.numbers[n]] self.forces.append((n, symbol) + f) elif n == 5: self.forces.append((" ...", "", " ...", " ...", " ...")) self.stress = row.get("stress") if self.stress is not None: self.stress = ", ".join("{0:.3f}".format(s) for s in self.stress) if "masses" in row: mass = row.masses.sum() else: mass = atomic_masses[row.numbers].sum() formula = hill(row.numbers) if subscript: formula = subscript.sub(r"<sub>\1</sub>", formula) table = [ ("id", "", row.id), ("age", "", float_to_time_string(now() - row.ctime, True)), ("formula", "", formula), ("user", "", row.user), ("calculator", "", row.get("calculator")), ("energy", "eV", row.get("energy")), ("fmax", "eV/Ang", fmax), ("charge", "|e|", row.get("charge")), ("mass", "au", mass), ("magnetic moment", "au", row.get("magmom")), ("unique id", "", row.unique_id), ("volume", "Ang^3", row.get("volume")), ] self.table = [(name, unit, value) for name, unit, value in table if value is not None] self.key_value_pairs = sorted(row.key_value_pairs.items()) or None self.dipole = row.get("dipole") if self.dipole is not None: self.dipole = ", ".join("{0:.3f}".format(d) for d in self.dipole) self.plots = [] self.data = row.get("data") if self.data: plots = [] for name, value in self.data.items(): if isinstance(value, dict) and "xlabel" in value: plots.append((value.get("number"), name)) self.plots = [name for number, name in sorted(plots)] self.data = ", ".join(self.data.keys()) self.constraints = row.get("constraints") if self.constraints: self.constraints = ", ".join(d["name"] for d in self.constraints)