def save_assigns_bulk_0(jobs, resource_set): if len(jobs) > 0: mld_id_start_time_s = [] mld_id_rid_s = [] for j in itervalues(jobs): mld_id_start_time_s.append((j.moldable_id, j.start_time)) riods = itvs2ids(j.res_set) mld_id_rid_s.extend( [(j.moldable_id, resource_set.rid_o2i[rid]) for rid in riods]) with db.engine.connect() as to_conn: cursor = to_conn.connection.cursor() pg_bulk_insert(cursor, db['gantt_jobs_predictions'], mld_id_start_time_s, ('moldable_job_id', 'start_time'), binary=True) pg_bulk_insert(cursor, db['queues'], mld_id_rid_s, ('moldable_job_id', 'resource_id'), binary=True)
def test_pg_bulk_insert_csv(): from oar.lib.psycopg2 import pg_bulk_insert cursor = db.session.bind.connection.cursor() columns = ("queue_name", "priority", "scheduler_policy", "state") rows = [ ("old_test", -1, "LIFO", "Inactive"), ("default_test", 1, "FIFO", "Active"), ("vip_test", 10, "FIFO", "Active"), ] pg_bulk_insert(cursor, db['queues'], rows, columns, binary=False) names = [row[0] for row in rows] queues = db['Queue'].query\ .filter(db['Queue'].name.in_(names))\ .order_by(db['Queue'].priority) for queue, row in zip(queues, rows): assert queue.name == row[0] assert queue.priority == row[1] assert queue.scheduler_policy == row[2] assert queue.state == row[3]
def save_assigns_bulk(jobs, resource_set): if len(jobs) > 0: logger.debug("nb job to save: " + str(len(jobs))) mld_id_start_time_s = [] mld_id_rid_s = [] for j in itervalues(jobs): logger.debug("first job_id to save: " + str(j.id)) mld_id_start_time_s.append((j.moldable_id, j.start_time)) riods = itvs2ids(j.res_set) mld_id_rid_s.extend( [(j.moldable_id, resource_set.rid_o2i[rid]) for rid in riods]) logger.info("save assignements") with db.engine.connect() as to_conn: cursor = to_conn.connection.cursor() pg_bulk_insert(cursor, db['gantt_jobs_predictions'], mld_id_start_time_s, ('moldable_job_id', 'start_time'), binary=True) pg_bulk_insert(cursor, db['queues'], mld_id_rid_s, ('moldable_job_id', 'resource_id'), binary=True)
def copy_table(ctx, table, from_conn, to_conn, pk=None): use_pg_copy = False if hasattr(ctx, 'pg_copy') and ctx.pg_copy: if hasattr(to_conn.dialect, 'psycopg2_version'): use_pg_copy = True insert_query = table.insert() select_query = select([table]) count_query = select([func.count()]).select_from(table) if pk is not None: min_pk = to_conn.execute(select([func.max(pk)])).scalar() if min_pk is not None: count_query = count_query.where(pk > min_pk) select_query = select_query.order_by( *(order_by_func(pk) for pk in get_primary_keys(table)) ) total_lenght = from_conn.execute(count_query).scalar() if total_lenght == 0: return select_query = select_query.execution_options(stream_results=True) def log(progress): percentage = blue("%s/%s" % (progress, total_lenght)) message = yellow('\r copy') + ' ~> table %s (%s)' ctx.log(message % (table.name, percentage), nl=False) def fetch_stream(): def gen_pagination_with_pk(chunk): max_pk_query = select([func.max(pk)]) min_pk_query = select([func.min(pk)]) max_pk = from_conn.execute(max_pk_query).scalar() or 0 min_pk = from_conn.execute(min_pk_query).scalar() or 0 min_pk = to_conn.execute(max_pk_query).scalar() or (min_pk - 1) left_seq = range(min_pk + 1, max_pk, chunk) right_seq = range(min_pk + chunk, max_pk + chunk, chunk) for min_id, max_id in zip(left_seq, right_seq): yield select_query.where(pk.between(min_id, max_id)) if pk is not None: queries = gen_pagination_with_pk(ctx.chunk) else: queries = [select_query] progress = 0 for query in queries: page = 0 while True: q = query.offset(page * ctx.chunk).limit(ctx.chunk) rows = from_conn.execute(q) if rows.rowcount == 0: break progress = rows.rowcount + progress if progress > total_lenght: progress = total_lenght log(progress) yield (i for i in rows) page = page + 1 ctx.log("") if not use_pg_copy: for rows in fetch_stream(): to_conn.execute(insert_query, list(rows)) else: from oar.lib.psycopg2 import pg_bulk_insert columns = None for rows in fetch_stream(): if columns is None: first = next(rows, None) columns = ["%s" % k for k in first.keys()] rows = chain((first,), rows) try: with to_conn.begin(): cursor = to_conn.connection.cursor() pg_bulk_insert(cursor, table, rows, columns, binary=ctx.pg_copy_binary) except: exc_type, exc_value, tb = sys.exc_info() reraise(exc_type, exc_value, tb.tb_next)