def choose_elements(bulk_database, n):
    '''
    Chooses `n` elements at random from the set of elements inside the given
    database.

    Args:
        bulk_database   A string pointing to the ASE *.db object that contains
                        the bulks you want to consider.
        n               A positive integer indicating how many elements you
                        want to choose.
    Returns:
        elements    A list of strings indicating the chosen elements
    '''
    db = ase.db.connect(bulk_database)
    all_elements = {
        ELEMENTS[number]
        for row in db.select() for number in row.numbers
    }
    elements = random.sample(all_elements, n)

    # Make sure we choose a combination of elements that exists in our bulk
    # database
    while db.count(elements) == 0:
        warnings.warn(
            'Sampled the elements %s, but could not find any matching '
            'bulks in the database (%s). Trying to re-sample' %
            (elements, bulk_database), RuntimeWarning)
        elements = random.sample(all_elements, n)

    return elements
Пример #2
0
Файл: db.py Проект: btodac/ase
def read_db(filename, index, **kwargs):
    db = ase.db.connect(filename, serial=True, **kwargs)

    if isinstance(index, basestring):
        try:
            index = string2index(index)
        except ValueError:
            pass

    if isinstance(index, int):
        index = slice(index, index + 1 or None)

    if isinstance(index, basestring):
        # index is a database query string:
        for row in db.select(index):
            yield row.toatoms()
    else:
        start, stop, step = index.indices(db.count())
        if start == stop:
            return
        assert step == 1
        for row in db.select(offset=start, limit=stop - start):
            yield row.toatoms()
Пример #3
0
def read_db(filename, index, **kwargs):
    db = ase.db.connect(filename, serial=True, **kwargs)

    if isinstance(index, basestring):
        try:
            index = string2index(index)
        except ValueError:
            pass

    if isinstance(index, int):
        index = slice(index, index + 1 or None)

    if isinstance(index, basestring):
        # index is a database query string:
        for row in db.select(index):
            yield row.toatoms()
    else:
        start, stop, step = index.indices(db.count())
        if start == stop:
            return
        assert step == 1
        for row in db.select(offset=start, limit=stop - start):
            yield row.toatoms()
Пример #4
0
def choose_adsorbate(adsorbate_database):
    '''
    Chooses a bulks from our database at random as long as the bulk contains
    all the specified elements.

    Args:
        adsorbate_database   A string pointing to the ASE *.db object that contains
                             the adsorbates you want to consider.
    Returns:
        atoms           `ase.Atoms` object of the adsorbate
        simles          SMILES-formatted representation of the adsorbate
        bond_indices    list of integers indicating the indices of the atoms in
                        the adsorbate that are meant to be bonded to the surface
    '''
    db = ase.db.connect(adsorbate_database)
    ads_idx = random.choice(list(range(db.count())))
    row = db.get(ads_idx + 1)  # ase.db's don't 0-index

    atoms = row.toatoms()
    data = row.data
    smiles = data['SMILE']
    bond_indices = data['bond_idx']
    return atoms, smiles, bond_indices
            # mol = list(db.select(formula=name, prototype=proto))[0]
            # thick.append(get_thick(mol))

alpha_x = numpy.array(alpha_x)
alpha_z = numpy.array(alpha_z)
eps_x_3D = numpy.array(eps_x_3D)
eps_z_3D = numpy.array(eps_z_3D)
Eg_HSE = numpy.array(Eg_HSE)
# thick = numpy.array(thick)

eps_x_gpaw = []
eps_z_gpaw = []
alpha_z_gpaw = []
Eg_gpaw = []
L_gpaw = []
for db_id in range(1, db.count() + 1):  # db index starts with 1
    mol = db.get(db_id)
    # if any(hasattr(mol, key) is False for key in ["alphax", "alphay", "alphaz",
    # "bulk_L", "bulk_eps_x", "bulk_eps_y",
    # "bulk_eps_z"]):
    # continue
    # if mol.bulk_calculated is False:
    # continue
    try:
        ax = (mol.alphax + mol.alphay) / 2
        az = mol.alphaz
        L, ex, ez, e = get_bulk(None, None, db_id, method="gpaw")
        ex_simu = 1 + 4 * pi * ax / L
        ez_simu = 1 / (1 - 4 * pi * az / L)
        # ez_simu = 4 * pi * az / L
        eps_x_gpaw.append((ex, ex_simu))
Пример #6
0
def index(project):
    global next_con_id

    # Backwards compatibility:
    project = request.args.get('project') or project

    if not projects:
        # First time: initialize list of projects
        for proj, db in sorted(databases.items()):
            meta = ase.db.web.process_metadata(db)
            db.meta = meta
            nrows = len(db)
            projects.append((proj, db.meta.get('title', proj), nrows))
            print('Initialized {proj}: {nrows} rows'.format(proj=proj,
                                                            nrows=nrows))

    if project is None and len(projects) > 1:
        return render_template('projects.html',
                               projects=projects,
                               home=home,
                               md=None,
                               ase_db_footer=ase_db_footer)

    if project is None:
        project = list(databases)[0]

    con_id = int(request.args.get('x', '0'))
    if con_id in connections:
        query, nrows, page, columns, sort, limit = connections[con_id]

    if con_id not in connections:
        # Give this connetion a new id:
        con_id = next_con_id
        next_con_id += 1
        query = ['', {}, '']
        nrows = None
        page = 0
        columns = None
        sort = 'id'
        limit = 25

    db = databases.get(project)
    if db is None:
        return 'No such project: ' + project

    meta = db.meta

    if columns is None:
        columns = meta.get('default_columns')[:] or list(all_columns)

    if 'sort' in request.args:
        column = request.args['sort']
        if column == sort:
            sort = '-' + column
        elif '-' + column == sort:
            sort = 'id'
        else:
            sort = column
        page = 0
    elif 'query' in request.args:
        dct = {}
        query = [request.args['query']]
        q = query[0]
        for special in meta['special_keys']:
            kind, key = special[:2]
            if kind == 'SELECT':
                value = request.args['select_' + key]
                dct[key] = convert_str_to_int_float_or_str(value)
                if value:
                    q += ',{}={}'.format(key, value)
            elif kind == 'BOOL':
                value = request.args['bool_' + key]
                dct[key] = convert_str_to_int_float_or_str(value)
                if value:
                    q += ',{}={}'.format(key, value)
            else:
                v1 = request.args['from_' + key]
                v2 = request.args['to_' + key]
                var = request.args['range_' + key]
                dct[key] = (v1, v2, var)
                if v1 or v2:
                    var = request.args['range_' + key]
                    if v1:
                        q += ',{}>={}'.format(var, v1)
                    if v2:
                        q += ',{}<={}'.format(var, v2)
        q = q.lstrip(',')
        query += [dct, q]
        sort = 'id'
        page = 0
        nrows = None
    elif 'limit' in request.args:
        limit = int(request.args['limit'])
        page = 0
    elif 'page' in request.args:
        page = int(request.args['page'])

    if 'toggle' in request.args:
        column = request.args['toggle']
        if column == 'reset':
            columns = meta.get('default_columns')[:] or list(all_columns)
        else:
            if column in columns:
                columns.remove(column)
                if column == sort.lstrip('-'):
                    sort = 'id'
                    page = 0
            else:
                columns.append(column)

    okquery = query

    if nrows is None:
        try:
            nrows = db.count(query[2])
        except (ValueError, KeyError) as e:
            flash(', '.join(['Bad query'] + list(e.args)))
            okquery = ('', {}, 'id=0')  # this will return no rows
            nrows = 0

    table = Table(db, meta.get('unique_key', 'id'))
    table.select(okquery[2], columns, sort, limit, offset=page * limit)

    con = Connection(query, nrows, page, columns, sort, limit)
    connections[con_id] = con

    if len(connections) > 1000:
        # Forget old connections:
        for cid in sorted(connections)[:200]:
            del connections[cid]

    table.format(SUBSCRIPT)

    addcolumns = [
        column for column in all_columns + table.keys
        if column not in table.columns
    ]

    return render_template('table.html',
                           project=project,
                           t=table,
                           md=meta,
                           con=con,
                           x=con_id,
                           home=home,
                           ase_db_footer=ase_db_footer,
                           pages=pages(page, nrows, limit),
                           nrows=nrows,
                           addcolumns=addcolumns,
                           row1=page * limit + 1,
                           row2=min((page + 1) * limit, nrows),
                           download_button=download_button)
Пример #7
0
    def transfer(self,
                 filename_sqlite,
                 block_size=1000,
                 start_block=0,
                 write_ase=True,
                 write_publication=True,
                 write_reaction=True,
                 write_reaction_system=True,
                 check=False):
        """ Transfer data from local sqlite3 .db file to the
        catalysis-hub postgreSQL server

        Parameters:
        filename_sqlite: str
            name of .db file
        block_size: int (default 1000)
            Number of atomic structures and reactions to write together
            in each block.
        start_block: int (default 0)
            Block to start with
        write_ase: bool
            whether or not to write atomic structures
        write_publication: bool
            whether or not to transfer publication table
        write_reaction: bool
            whether or not to transfer reaction table
        write_reaction_system: bool
            whether or not to write reaction_system table
        """

        self.stdout.write('Starting transfer\n')
        con = self.connection or self._connect()
        self._initialize(con)
        self.stdout.write('Finished initialization\n')
        cur = con.cursor()
        self.stdout.write('Got a cursor\n')
        self.stdout.write('Connecting to {0}\n'.format(self.server_name))

        nrows = 0
        if write_ase:
            self.stdout.write('Transfering atomic structures\n')
            db = ase.db.connect(filename_sqlite)
            n_structures = db.count()
            n_blocks = n_structures // block_size + 1
            t_av = 0
            for block_id in range(start_block, n_blocks):
                i = block_id - start_block
                t1 = time.time()
                b0 = block_id * block_size
                b1 = (block_id + 1) * block_size + 1

                if block_id + 1 == n_blocks:
                    b1 = n_structures + 1

                rows = list(db.select('{}<id<{}'.format(b0, b1)))

                with ase.db.connect(self.server_name,
                                    type='postgresql') as db2:
                    # write one row at the time until ase is updated
                    # db2.write(rows)
                    for row in rows:
                        db2.write(row)

                nrows += len(rows)
                t2 = time.time()
                dt = t2 - t1
                t_av = (t_av * i + dt) / (i + 1)

                self.stdout.write(
                    '  Finnished Block {0} / {1} in {2} sec\n'.format(
                        block_id + 1, n_blocks, dt))
                self.stdout.write(
                    '    Completed transfer of {0} atomic structures\n'.format(
                        nrows))
                self.stdout.write('    Estimated time left: {0} sec\n'.format(
                    t_av * (n_blocks - block_id - 1)))

        db = CathubSQLite(filename_sqlite)
        con_lite = db._connect()
        cur_lite = con_lite.cursor()

        Npub = 0
        Npubstruc = 0
        if write_publication:
            self.stdout.write('Transfering publications\n')
            try:
                npub = db.get_last_pub_id(cur_lite)
            except BaseException:
                npub = 1
            for id_lite in range(1, npub + 1):
                Npub += 1
                row = db.read(id=id_lite, table='publication')
                if len(row) == 0:
                    continue
                values = row[0]
                pid, pub_id = self.write_publication(values)

            # Publication structures connection
            cur_lite.execute("""SELECT * from publication_system;""")
            publication_system_values = []
            rows = cur_lite.fetchall()
            for row in rows:
                Npubstruc += 1
                values = list(row)
                value_list = get_value_list(values)
                publication_system_values += [tuple(value_list)]

            # Insert into publication_system table
            key_str = get_key_str(table='publication_system')
            insert_command = """INSERT INTO publication_system ({0})
            VALUES %s ON CONFLICT DO NOTHING;"""\
                .format(key_str)

            execute_values(cur=cur,
                           sql=insert_command,
                           argslist=publication_system_values,
                           page_size=1000)

            # Write pub_id to systems table
            cur.execute("""UPDATE systems SET
            key_value_pairs=jsonb_set(key_value_pairs, '{{"pub_id"}}', '"{pub_id}"')
            WHERE unique_id IN
            (SELECT ase_id from publication_system WHERE pub_id='{pub_id}')"""\
                        .format(pub_id=pub_id))

            con.commit()
            self.stdout.write('  Completed transfer of publications\n')

        Ncat = 0
        Ncatstruc = 0

        if write_reaction:
            self.stdout.write('Transfering reactions')
            cur.execute('SELECT max(id) from reaction;')
            ID = cur.fetchone()[0] or 0

            n_react = db.get_last_id(cur_lite)

            n_blocks = int(n_react / block_size) + 1
            t_av = 0
            for block_id in range(start_block, n_blocks):
                reaction_values = []
                reaction_system_values = []
                Ncat0 = Ncat
                Ncatstruc0 = Ncatstruc

                i = block_id - start_block
                t1 = time.time()
                b0 = block_id * block_size + 1
                b1 = (block_id + 1) * block_size + 1
                if block_id + 1 == n_blocks:
                    b1 = n_react + 1

                for id_lite in range(b0, b1):
                    row = db.read(id_lite)
                    if len(row) == 0:
                        continue
                    values = row[0]

                    # id = self.check(values[13], values[1], values[6], values[7],
                    #                values[8], strict=True)
                    id = None
                    update_rs = False
                    if id is not None:
                        id = self.update(id, values)
                        self.stdout.write(
                            'Updated reaction db with row id = {}\n'.format(
                                id))
                        update_rs = True
                    else:
                        ID += 1
                        Ncat += 1
                        value_list = get_value_list(values)
                        value_list[0] = ID  # set new ID
                        reaction_values += [tuple(value_list)]
                        if write_reaction_system:
                            cur_lite.execute(
                                "SELECT * from reaction_system where id={};".
                                format(id_lite))
                            rows = cur_lite.fetchall()
                            if update_rs:
                                cur.execute("""Delete from reaction_system
                                where id={0}""".format(id))
                            for row in rows:
                                Ncatstruc += 1
                                values = list(row)
                                if len(values) == 3:
                                    values.insert(1, None)
                                value_list = get_value_list(values)
                                value_list[3] = ID
                                reaction_system_values += [tuple(value_list)]

                q = ', '.join('?' * 14)
                q = '({})'.format(q.replace('?', '%s'))

                key_str = get_key_str()
                insert_command = """INSERT INTO reaction
                ({0}) VALUES %s;""".format(key_str)

                execute_values(cur=cur,
                               sql=insert_command,
                               argslist=reaction_values,
                               template=q,
                               page_size=block_size)

                key_str = get_key_str('reaction_system')
                insert_command = """INSERT INTO reaction_system
                ({0}) VALUES %s ON CONFLICT DO NOTHING;""".format(key_str)

                execute_values(cur=cur,
                               sql=insert_command,
                               argslist=reaction_system_values,
                               page_size=1000)
                con.commit()

                t2 = time.time()
                dt = t2 - t1
                t_av = (t_av * i + dt) / (i + 1)

                self.stdout.write(
                    '  Finnished Block {0} / {1} in {2} sec \n'.format(
                        block_id + 1, n_blocks, dt))
                self.stdout.write(
                    '    Completed transfer of {0} reactions. \n'.format(
                        Ncat - Ncat0))
                self.stdout.write('    Estimated time left: {0} sec \n'.format(
                    t_av * (n_blocks - block_id - 1)))

            self.stdout.write('  Completed transfer of reactions\n')

        for statement in tsvector_update:
            cur.execute(statement)

        if self.connection is None:
            con.commit()
            con.close()

        self.stdout.write('Inserted into:\n')
        self.stdout.write('  systems: {0}\n'.format(nrows))
        self.stdout.write('  publication: {0}\n'.format(Npub))
        self.stdout.write('  publication_system: {0}\n'.format(Npubstruc))
        self.stdout.write('  reaction: {0}\n'.format(Ncat))
        self.stdout.write('  reaction_system: {0}\n'.format(Ncatstruc))
import sys
sys.path.insert(0, '../../../')
from src.discoverers.adsorption.values import calc_co2rr_activities
from src.discoverers.adsorption.mms import MultiscaleDiscoverer
from src.discoverers.adsorption.models import PrimeModel

# Discoverer settings
adsorbate = 'CO'
initial_training_size = 1000
batch_size = 200
quantile_cutoff = 0.9

# Data loading
db_dir = '../../pull_data/%s_synthesized/' % adsorbate
db = ase.db.connect(db_dir + '%s.db' % adsorbate)
rows = list(tqdm(db.select(), desc='reading ASE db', total=db.count()))
random.Random(42).shuffle(rows)


def parse_row(row):
    feature = row.id
    data = row.data
    label = data['adsorption_energy']
    surface = (data['mpid'], data['miller'], data['shift'], data['top'])
    return feature, label, surface


def parse_rows(rows):
    with Pool(processes=32, maxtasksperchild=1000) as pool:
        iterator = pool.imap(parse_row, rows, chunksize=100)
        iterator_tracked = tqdm(iterator, desc='parsing rows', total=len(rows))
Пример #9
0
Файл: app.py Проект: jboes/ase
def index():
    global next_con_id
    con_id = int(request.args.get('x', '0'))
    if con_id not in connections:
        con_id = next_con_id
        next_con_id += 1
        query = ''
        columns = list(all_columns)
        sort = 'id'
        limit = 25
        opened = set()
        nrows = None
        page = 0
    else:
        query, nrows, page, columns, sort, limit, opened = connections[con_id]

    if 'sort' in request.args:
        column = request.args['sort']
        if column == sort:
            sort = '-' + column
        elif '-' + column == sort:
            sort = 'id'
        else:
            sort = column
        page = 0
    elif 'query' in request.args:
        query = request.args['query'].encode()
        try:
            limit = max(1, min(int(request.args.get('limit', limit)), 200))
        except ValueError:
            pass
        sort = 'id'
        opened = set()
        page = 0
        nrows = None
    elif 'page' in request.args:
        page = int(request.args['page'])

    if 'toggle' in request.args:
        tcolumns = request.args['toggle'].split(',')
        if tcolumns == ['reset']:
            columns = list(all_columns)
        else:
            for column in tcolumns:
                if column in columns:
                    columns.remove(column)
                    if column == sort.lstrip('-'):
                        sort = 'id'
                        page = 0
                else:
                    columns.append(column)
        
    if nrows is None:
        nrows = db.count(query)
        
    table = Table(db)
    table.select(query, columns, sort, limit, offset=page * limit)
    con = Connection(query, nrows, page, columns, sort, limit, opened)
    connections[con_id] = con
    table.format(SUBSCRIPT)
    addcolumns = [column for column in all_columns + table.keys
                  if column not in table.columns]

    return render_template('table.html', t=table, con=con, cid=con_id,
                           home=home, pages=pages(page, nrows, limit),
                           nrows=nrows,
                           addcolumns=addcolumns,
                           row1=page * limit + 1,
                           row2=min((page + 1) * limit, nrows))
Пример #10
0
def index():
    global next_con_id

    if not projects:
        # First time: initialize list of projects
        projects[:] = [(proj, d.metadata.get('title', proj))
                       for proj, d in sorted(databases.items())]

    con_id = int(request.args.get('x', '0'))
    if con_id in connections:
        project, query, nrows, page, columns, sort, limit = connections[con_id]
        newproject = request.args.get('project')
        if newproject is not None and newproject != project:
            con_id = 0

    if con_id not in connections:
        # Give this connetion a new id:
        con_id = next_con_id
        next_con_id += 1
        project = request.args.get('project', projects[0][0])
        query = ['', {}, '']
        nrows = None
        page = 0
        columns = None
        sort = 'id'
        limit = 25

    db = databases[project]

    if not hasattr(db, 'meta'):
        meta = ase.db.web.process_metadata(db)
        db.meta = meta
    else:
        meta = db.meta

    if columns is None:
        columns = meta.get('default_columns')[:] or list(all_columns)

    if 'sort' in request.args:
        column = request.args['sort']
        if column == sort:
            sort = '-' + column
        elif '-' + column == sort:
            sort = 'id'
        else:
            sort = column
        page = 0
    elif 'query' in request.args:
        dct = {}
        query = [request.args['query']]
        q = query[0]
        for special in meta['special_keys']:
            kind, key = special[:2]
            if kind == 'SELECT':
                value = request.args['select_' + key]
                dct[key] = value
                if value:
                    q += ',{}={}'.format(key, value)
            elif kind == 'BOOL':
                value = request.args['bool_' + key]
                dct[key] = value
                if value:
                    q += ',{}={}'.format(key, value)
            else:
                v1 = request.args['from_' + key]
                v2 = request.args['to_' + key]
                var = request.args['range_' + key]
                dct[key] = (v1, v2, var)
                if v1 or v2:
                    var = request.args['range_' + key]
                    if v1:
                        q += ',{}>={}'.format(var, v1)
                    if v2:
                        q += ',{}<={}'.format(var, v2)
        q = q.lstrip(',')
        query += [dct, q]
        sort = 'id'
        page = 0
        nrows = None
    elif 'limit' in request.args:
        limit = int(request.args['limit'])
        page = 0
    elif 'page' in request.args:
        page = int(request.args['page'])

    if 'toggle' in request.args:
        column = request.args['toggle']
        if column == 'reset':
            columns = meta.get('default_columns')[:] or list(all_columns)
        else:
            if column in columns:
                columns.remove(column)
                if column == sort.lstrip('-'):
                    sort = 'id'
                    page = 0
            else:
                columns.append(column)

    okquery = query

    if nrows is None:
        try:
            nrows = db.count(query[2])
        except (ValueError, KeyError) as e:
            flash(', '.join(['Bad query'] + list(e.args)))
            okquery = ('', {}, 'id=0')  # this will return no rows
            nrows = 0

    table = Table(db)
    table.select(okquery[2], columns, sort, limit, offset=page * limit)

    con = Connection(project, query, nrows, page, columns, sort, limit)
    connections[con_id] = con

    if len(connections) > 1000:
        # Forget old connections:
        for cid in sorted(connections)[:200]:
            del connections[cid]

    table.format(SUBSCRIPT)

    addcolumns = [column for column in all_columns + table.keys
                  if column not in table.columns]

    return render_template('table.html',
                           project=project,
                           projects=projects,
                           t=table,
                           md=meta,
                           con=con,
                           x=con_id,
                           home=home,
                           pages=pages(page, nrows, limit),
                           nrows=nrows,
                           addcolumns=addcolumns,
                           row1=page * limit + 1,
                           row2=min((page + 1) * limit, nrows))
Пример #11
0
    def transfer(self,
                 filename_sqlite,
                 start_id=1,
                 write_ase=True,
                 write_publication=True,
                 write_reaction=True,
                 write_reaction_system=True,
                 block_size=1000,
                 start_block=0):

        self.stdout.write('Starting transfer\n')
        con = self.connection or self._connect()
        self._initialize(con)
        self.stdout.write('Finished initialization\n')
        cur = con.cursor()
        self.stdout.write('Got a cursor\n')

        set_schema = 'SET search_path = {0};'.format(self.schema)
        cur.execute(set_schema)

        import os
        import time
        self.stdout.write('Imported os\n')

        import ase.db
        self.stdout.write('Imported ase.db\n')
        self.stdout.write('Building server_name\n')
        server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format(
            self.user, self.password, self.server)

        self.stdout.write('Connecting to {server_name}\n'.format(**locals()))

        nrows = 0
        if write_ase:
            print('Transfering atomic structures')
            db = ase.db.connect(filename_sqlite)
            n_structures = db.count()
            n_blocks = int(n_structures / block_size) + 1
            t_av = 0
            for block_id in range(start_block, n_blocks):
                i = block_id - start_block
                t1 = time.time()
                b0 = block_id * block_size + 1
                b1 = (block_id + 1) * block_size + 1
                self.stdout.write(
                    str(block_id) + ' ' + 'from ' + str(b0) + ' to ' +
                    str(b1) + '\n')
                if block_id + 1 == n_blocks:
                    b1 = n_structures + 1

                rows = list(db.select('{}<id<{}'.format(b0 - 1, b1)))

                with ase.db.connect(server_name, type='postgresql') as db2:

                    db2.write(rows)

                nrows += len(rows)
                t2 = time.time()
                dt = t2 - t1
                t_av = (t_av * i + dt) / (i + 1)

                self.stdout.write(
                    '  Finnished Block {0} / {1} in {2} sec'.format(
                        block_id, n_blocks, dt))
                self.stdout.write(
                    '    Completed transfer of {0} atomic structures.'.format(
                        nrows))
                self.stdout.write('    Estimated time left: {0} sec'.format(
                    t_av * (n_blocks - block_id)))

        from catkit.hub.cathubsqlite import CathubSQLite
        db = CathubSQLite(filename_sqlite)
        con_lite = db._connect()
        cur_lite = con_lite.cursor()

        # write publication
        Npub = 0
        Npubstruc = 0
        if write_publication:
            try:
                npub = db.get_last_pub_id(cur_lite)
            except BaseException:
                npub = 1
            for id_lite in range(1, npub + 1):
                Npub += 1
                row = db.read(id=id_lite, table='publication')
                if len(row) == 0:
                    continue
                values = row[0]
                pid, pub_id = self.write_publication(values)

            # Publication structures connection
            cur_lite.execute("""SELECT * from publication_system;""")
            rows = cur_lite.fetchall()
            for row in rows:
                Npubstruc += 1
                values = row[:]
                key_str, value_str = get_key_value_str(
                    values, table='publication_system')

                set_schema = 'SET search_path = {0};'.format(self.schema)
                cur.execute(set_schema)
                print("[SET SCHEMA] {set_schema}".format(**locals()))

                insert_command = """INSERT INTO publication_system ({0})
                VALUES ({1}) ON CONFLICT DO NOTHING;"""\
                    .format(key_str, value_str)

                cur.execute(insert_command)
                # self.write(values, table='publication_system')
            con.commit()

        Ncat = 0
        Ncatstruc = 0

        if write_reaction:
            n = db.get_last_id(cur_lite)
            select_ase = """SELECT * from reaction_system where id={};"""
            for id_lite in range(start_id, n + 1):
                row = db.read(id_lite)
                if len(row) == 0:
                    continue
                values = row[0]

                id = self.check(values[13],
                                values[1],
                                values[6],
                                values[7],
                                values[8],
                                strict=True)
                update_rs = False

                if id is not None:
                    id = self.update(id, values)
                    self.stdout.write(
                        'Updated reaction db with row id = {}\n'.format(id))
                    update_rs = True
                else:
                    Ncat += 1
                    id = self.write(values)
                    self.stdout.write(
                        'Written to reaction db row id = {0}\n'.format(id))

                cur_lite.execute(select_ase.format(id_lite))
                rows = cur_lite.fetchall()
                if write_reaction_system:
                    if update_rs:
                        cur.execute("""Delete from reaction_system231
                        where reaction_id={0}""".format(id))
                    for row in rows:
                        Ncatstruc += 1
                        values = list(row)
                        if len(values) == 3:
                            values.insert(1, None)

                        values[3] = id

                        key_str, value_str = \
                            get_key_value_str(values, table='reaction_system')

                        set_schema = 'SET search_path = {0};'.format(
                            self.schema)
                        cur.execute(set_schema)
                        print("[SET SCHEMA] {set_schema}".format(**locals()))

                        insert_command = """INSERT INTO reaction_system
                        ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;"""\
                            .format(key_str, value_str)

                        print("[INSERT COMMAND] {insert_command}".format(
                            **locals()))
                        cur.execute(insert_command)

                con.commit()  # Commit reaction_system for each row

        for statement in tsvector_update:
            cur.execute(statement)

        if self.connection is None:
            con.commit()
            con.close()

        self.stdout.write('Inserted into:\n')
        self.stdout.write('  systems: {0}\n'.format(nrows))
        self.stdout.write('  publication: {0}\n'.format(Npub))
        self.stdout.write('  publication_system: {0}\n'.format(Npubstruc))
        self.stdout.write('  reaction: {0}\n'.format(Ncat))
        self.stdout.write('  reaction_system: {0}\n'.format(Ncatstruc))
Пример #12
0
def index():
    global next_con_id
    con_id = int(request.args.get('x', '0'))
    if con_id not in connections:
        con_id = next_con_id
        next_con_id += 1
        query = ''
        columns = list(all_columns)
        sort = 'id'
        limit = 25
        opened = set()
        nrows = None
        page = 0
    else:
        query, nrows, page, columns, sort, limit, opened = connections[con_id]

    if 'sort' in request.args:
        column = request.args['sort']
        if column == sort:
            sort = '-' + column
        elif '-' + column == sort:
            sort = 'id'
        else:
            sort = column
        page = 0
    elif 'query' in request.args:
        query = request.args['query'].encode()
        try:
            limit = max(1, min(int(request.args.get('limit', limit)), 200))
        except ValueError:
            pass
        sort = 'id'
        opened = set()
        page = 0
        nrows = None
    elif 'page' in request.args:
        page = int(request.args['page'])

    if 'toggle' in request.args:
        tcolumns = request.args['toggle'].split(',')
        if tcolumns == ['reset']:
            columns = list(all_columns)
        else:
            for column in tcolumns:
                if column in columns:
                    columns.remove(column)
                    if column == sort.lstrip('-'):
                        sort = 'id'
                        page = 0
                else:
                    columns.append(column)

    if nrows is None:
        nrows = db.count(query)

    table = Table(db)
    table.select(query, columns, sort, limit, offset=page * limit)
    con = Connection(query, nrows, page, columns, sort, limit, opened)
    connections[con_id] = con
    table.format(SUBSCRIPT)
    addcolumns = [
        column for column in all_columns + table.keys
        if column not in table.columns
    ]

    return render_template('table.html',
                           t=table,
                           con=con,
                           cid=con_id,
                           home=home,
                           pages=pages(page, nrows, limit),
                           nrows=nrows,
                           addcolumns=addcolumns,
                           row1=page * limit + 1,
                           row2=min((page + 1) * limit, nrows))
Пример #13
0
def index():
    global next_con_id
    con_id = int(request.args.get("x", "0"))
    if con_id not in connections:
        con_id = next_con_id
        next_con_id += 1
        query = ""
        columns = list(all_columns)
        sort = "id"
        limit = 25
        opened = set()
        nrows = None
        page = 0
    else:
        query, nrows, page, columns, sort, limit, opened = connections[con_id]

    if "sort" in request.args:
        column = request.args["sort"]
        if column == sort:
            sort = "-" + column
        elif "-" + column == sort:
            sort = "id"
        else:
            sort = column
        page = 0
    elif "query" in request.args:
        query = request.args["query"].encode()
        try:
            limit = max(1, min(int(request.args.get("limit", limit)), 200))
        except ValueError:
            pass
        sort = "id"
        opened = set()
        page = 0
        nrows = None
    elif "page" in request.args:
        page = int(request.args["page"])

    if "toggle" in request.args:
        tcolumns = request.args["toggle"].split(",")
        if tcolumns == ["reset"]:
            columns = list(all_columns)
        else:
            for column in tcolumns:
                if column in columns:
                    columns.remove(column)
                    if column == sort.lstrip("-"):
                        sort = "id"
                        page = 0
                else:
                    columns.append(column)

    if nrows is None:
        nrows = db.count(query)

    table = Table(db)
    table.select(query, columns, sort, limit, offset=page * limit)
    con = Connection(query, nrows, page, columns, sort, limit, opened)
    connections[con_id] = con
    table.format(SUBSCRIPT)
    addcolumns = [column for column in all_columns + table.keys if column not in table.columns]

    return render_template(
        "table.html",
        t=table,
        con=con,
        cid=con_id,
        home=home,
        pages=pages(page, nrows, limit),
        nrows=nrows,
        addcolumns=addcolumns,
        row1=page * limit + 1,
        row2=min((page + 1) * limit, nrows),
    )
Пример #14
0
    def transfer(self,
                 filename_sqlite,
                 start_id=1,
                 write_ase=True,
                 write_publication=True,
                 write_reaction=True,
                 write_reaction_system=True,
                 block_size=1000,
                 start_block=0):
        self.stdout.write('Starting transfer\n')
        con = self.connection or self._connect()
        self._initialize(con)
        self.stdout.write('Finished initialization\n')
        cur = con.cursor()
        self.stdout.write('Got a cursor\n')

        set_schema = 'SET search_path = {0};'.format(self.schema)
        cur.execute(set_schema)

        import os
        self.stdout.write('Imported os\n')
        import ase.db
        self.stdout.write('Imported ase.db\n')
        self.stdout.write('Building server_name\n')
        server_name = "postgres://{0}:{1}@{2}:5432/catalysishub".format(
            self.user, self.password, self.server)
        self.stdout.write('Connecting to {server_name}\n'.format(**locals()))

        nkvp = 0
        nrows = 0
        if write_ase:
            db = ase.db.connect(filename_sqlite)
            n_structures = db.count()
            n_blocks = int(n_structures / block_size) + 1
            for block_id in range(start_block, n_blocks):
                b0 = block_id * block_size + 1
                b1 = (block_id + 1) * block_size + 1
                self.stdout.write(
                    str(block_id) + ' ' + 'from ' + str(b0) + ' to ' +
                    str(b1) + '\n')
                if block_id + 1 == n_blocks:
                    b1 = n_structures + 1
                #rows = [db._get_row(i) for i in range(b0, b1]
                #db2 = ase.db.connect(server_name, type='postgresql')
                #for lala in [0]:
                with ase.db.connect(server_name, type='postgresql') as db2:
                    for i in range(b0, b1):
                        self.stdout.write('  .' + str(i))
                        self.stdout.flush()
                        row = db.get(i)
                        kvp = row.get('key_value_pairs', {})
                        nkvp -= len(kvp)
                        # kvp.update(add_key_value_pairs)
                        nkvp += len(kvp)
                        db2.write(row, data=row.get('data'), **kvp)
                        nrows += 1
                    self.stdout.write('\n')

                self.stdout.write('Finished Block {0}\n:'.format(block_id))
                self.stdout.write(
                    '  Completed transfer of {0} atomic structures.\n'.format(
                        nrows))

        from cathub.cathubsqlite import CathubSQLite
        db = CathubSQLite(filename_sqlite)
        con_lite = db._connect()
        cur_lite = con_lite.cursor()

        # write publication
        Npub = 0
        Npubstruc = 0
        if write_publication:
            try:
                npub = db.get_last_pub_id(cur_lite)
            except:
                npub = 1
            for id_lite in range(1, npub + 1):
                Npub += 1
                row = db.read(id=id_lite, table='publication')
                if len(row) == 0:
                    continue
                values = row[0]
                pid, pub_id = self.write_publication(values)

            # Publication structures connection
            cur_lite.execute("""SELECT * from publication_system;""")
            rows = cur_lite.fetchall()
            for row in rows:
                Npubstruc += 1
                values = row[:]
                key_str, value_str = get_key_value_str(
                    values, table='publication_system')

                set_schema = 'SET search_path = {0};'.format(self.schema)
                cur.execute(set_schema)
                print("[SET SCHEMA] {set_schema}".format(**locals()))

                insert_command = 'INSERT INTO publication_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format(
                    key_str, value_str)

                cur.execute(insert_command)
                # self.write(values, table='publication_system')
            con.commit()

        Ncat = 0
        Ncatstruc = 0

        if write_reaction:
            n = db.get_last_id(cur_lite)
            select_ase = """SELECT * from reaction_system where id={};"""
            for id_lite in range(start_id, n + 1):
                row = db.read(id_lite)
                if len(row) == 0:
                    continue
                values = row[0]

                id = self.check(values[13],
                                values[1],
                                values[6],
                                values[7],
                                values[8],
                                strict=True)
                update_rs = False

                if id is not None:
                    id = self.update(id, values)
                    self.stdout.write(
                        'Updated reaction db with row id = {}\n'.format(id))
                    update_rs = True
                else:
                    Ncat += 1
                    id = self.write(values)
                    self.stdout.write(
                        'Written to reaction db row id = {0}\n'.format(id))

                cur_lite.execute(select_ase.format(id_lite))
                rows = cur_lite.fetchall()
                if write_reaction_system:
                    if update_rs:
                        cur.execute(
                            'Delete from reaction_system where reaction_id={0}'
                            .format(id))
                    for row in rows:
                        Ncatstruc += 1
                        values = list(row)
                        if len(values) == 3:
                            values.insert(1, None)

                        values[3] = id

                        key_str, value_str = get_key_value_str(
                            values, table='reaction_system')

                        set_schema = 'SET search_path = {0};'.format(
                            self.schema)
                        cur.execute(set_schema)
                        print("[SET SCHEMA] {set_schema}".format(**locals()))

                        insert_command = 'INSERT INTO reaction_system ({0}) VALUES ({1}) ON CONFLICT DO NOTHING;'.format(
                            key_str, value_str)

                        print("[INSERT COMMAND] {insert_command}".format(
                            **locals()))
                        cur.execute(insert_command)

                con.commit()  # Commit reaction_system for each row

        for statement in tsvector_update:
            cur.execute(statement)

        if self.connection is None:
            con.commit()
            con.close()

        self.stdout.write('Inserted into:\n')
        self.stdout.write('  systems: {0}\n'.format(nrows))
        self.stdout.write('  publication: {0}\n'.format(Npub))
        self.stdout.write('  publication_system: {0}\n'.format(Npubstruc))
        self.stdout.write('  reaction: {0}\n'.format(Ncat))
        self.stdout.write('  reaction_system: {0}\n'.format(Ncatstruc))