def session_scope(id_, versioned=True): """ Provide a transactional scope around a series of operations. Takes care of rolling back failed transactions and closing the session when it goes out of scope. Note that sqlalchemy automatically starts a new database transaction when the session is created, and restarts a new transaction after every commit() on the session. Your database backend's transaction semantics are important here when reasoning about concurrency. Parameters ---------- id_ : int Object primary key to grab a session for. versioned : bool Do you want to enable the transaction log? Yields ------ Session The created session. """ engine = engine_manager.get_for_id(id_) session = new_session(engine, versioned) try: if config.get("LOG_DB_SESSIONS"): start_time = time.time() calling_frame = sys._getframe().f_back.f_back call_loc = "{}:{}".format( calling_frame.f_globals.get("__name__"), calling_frame.f_lineno ) logger = log.bind( engine_id=id(engine), session_id=id(session), call_loc=call_loc ) logger.info("creating db_session", sessions_used=engine.pool.checkedout()) yield session session.commit() except BaseException as exc: try: session.rollback() raise except OperationalError: log.warn( "Encountered OperationalError on rollback", original_exception=type(exc) ) raise exc finally: if config.get("LOG_DB_SESSIONS"): lifetime = time.time() - start_time logger.info( "closing db_session", lifetime=lifetime, sessions_used=engine.pool.checkedout(), ) session.close()
def empty_db(config): from inbox.ignition import engine_manager from inbox.models.session import new_session setup_test_db() engine = engine_manager.get_for_id(0) engine.session = new_session(engine) yield engine engine.session.close()
def empty_db(config): from inbox.ignition import engine_manager from inbox.models.session import new_session setup_test_db() engine = engine_manager.get_for_id(0) engine.session = new_session(engine) yield engine engine.session.close()
def session_scope(id_, versioned=True, explicit_begin=False): """ Provide a transactional scope around a series of operations. Takes care of rolling back failed transactions and closing the session when it goes out of scope. Note that sqlalchemy automatically starts a new database transaction when the session is created, and restarts a new transaction after every commit() on the session. Your database backend's transaction semantics are important here when reasoning about concurrency. Parameters ---------- versioned : bool Do you want to enable the transaction log? explicit_begin: bool If True, issue an explicit BEGIN statement instead of relying on implicit transactional semantics. Yields ------ Session The created session. """ engine = engine_manager.get_for_id(id_) session = new_session(engine, versioned) try: if config.get('LOG_DB_SESSIONS'): start_time = time.time() calling_frame = sys._getframe().f_back.f_back call_loc = '{}:{}'.format(calling_frame.f_globals.get('__name__'), calling_frame.f_lineno) logger = log.bind(engine_id=id(engine), session_id=id(session), call_loc=call_loc) logger.info('creating db_session', sessions_used=engine.pool.checkedout()) yield session session.commit() except (gevent.GreenletExit, gevent.Timeout) as exc: log.info('Invalidating connection on gevent exception', exc_info=True) session.invalidate() except BaseException as exc: try: session.rollback() raise except OperationalError: log.warn('Encountered OperationalError on rollback', original_exception=type(exc)) raise exc finally: if config.get('LOG_DB_SESSIONS'): lifetime = time.time() - start_time logger.info('closing db_session', lifetime=lifetime, sessions_used=engine.pool.checkedout()) session.close()
def db(dbloader): from inbox.ignition import engine_manager from inbox.models.session import new_session engine = engine_manager.get_for_id(0) # TODO(emfree): tests should really either instantiate their own sessions, # or take a fixture that is itself a session. engine.session = new_session(engine) yield engine engine.session.close()
def db(dbloader): from inbox.ignition import engine_manager from inbox.models.session import new_session engine = engine_manager.get_for_id(0) # TODO(emfree): tests should really either instantiate their own sessions, # or take a fixture that is itself a session. engine.session = new_session(engine) yield engine engine.session.close()
def session_scope(id_, versioned=True): """ Provide a transactional scope around a series of operations. Takes care of rolling back failed transactions and closing the session when it goes out of scope. Note that sqlalchemy automatically starts a new database transaction when the session is created, and restarts a new transaction after every commit() on the session. Your database backend's transaction semantics are important here when reasoning about concurrency. Parameters ---------- versioned : bool Do you want to enable the transaction log? debug : bool Do you want to turn on SQL echoing? Use with caution. Engine is not cached in this case! Yields ------ Session The created session. """ engine = engine_manager.get_for_id(id_) session = new_session(engine, versioned) try: if config.get('LOG_DB_SESSIONS'): start_time = time.time() calling_frame = sys._getframe().f_back.f_back call_loc = '{}:{}'.format(calling_frame.f_globals.get('__name__'), calling_frame.f_lineno) logger = log.bind(engine_id=id(engine), session_id=id(session), call_loc=call_loc) logger.info('creating db_session', sessions_used=engine.pool.checkedout()) yield session session.commit() except BaseException as exc: try: session.rollback() raise except OperationalError: log.warn('Encountered OperationalError on rollback', original_exception=type(exc)) raise exc finally: if config.get('LOG_DB_SESSIONS'): lifetime = time.time() - start_time logger.info('closing db_session', lifetime=lifetime, sessions_used=engine.pool.checkedout()) session.close()
def delete_namespace(account_id, namespace_id, throttle=False, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. """ from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) # Chunk delete for tables that might have a large concurrent write volume # to prevent those transactions from blocking. # NOTE: ImapFolderInfo does not fall into this category but we include it # here for simplicity. filters = OrderedDict() for table in ['message', 'block', 'thread', 'transaction', 'actionlog', 'contact', 'event', 'dataprocessingcache']: filters[table] = ('namespace_id', namespace_id) with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if account.discriminator != 'easaccount': filters['imapuid'] = ('account_id', account_id) filters['imapfoldersyncstatus'] = ('account_id', account_id) filters['imapfolderinfo'] = ('account_id', account_id) else: filters['easuid'] = ('easaccount_id', account_id) filters['easfoldersyncstatus'] = ('account_id', account_id) for cls in filters: _batch_delete(engine, cls, filters[cls], throttle=throttle, dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = 'DELETE FROM {} WHERE {}={};' filters = OrderedDict() for table in ('category', 'calendar'): filters[table] = ('namespace_id', namespace_id) for table in ('folder', 'label'): filters[table] = ('account_id', account_id) filters['namespace'] = ('id', namespace_id) for table, (column, id_) in filters.iteritems(): log.info('Performing bulk deletion', table=table) start = time.time() if throttle and check_throttle(): log.info("Throttling deletion") gevent.sleep(60) if not dry_run: engine.execute(query.format(table, column, id_)) else: log.debug(query.format(table, column, id_)) end = time.time() log.info('Completed bulk deletion', table=table, time=end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit()
def delete_namespace(account_id, namespace_id, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. """ from inbox.models.session import session_scope from inbox.models import Account from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) # Chunk delete for tables that might have a large concurrent write volume # to prevent those transactions from blocking. # NOTE: ImapFolderInfo does not fall into this category but we include it # here for simplicity. filters = OrderedDict() for table in [ 'message', 'block', 'thread', 'transaction', 'actionlog', 'contact', 'event', 'dataprocessingcache' ]: filters[table] = ('namespace_id', namespace_id) with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if account.discriminator != 'easaccount': filters['imapuid'] = ('account_id', account_id) filters['imapfoldersyncstatus'] = ('account_id', account_id) filters['imapfolderinfo'] = ('account_id', account_id) else: filters['easuid'] = ('easaccount_id', account_id) filters['easfoldersyncstatus'] = ('account_id', account_id) for cls in filters: _batch_delete(engine, cls, filters[cls], dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = 'DELETE FROM {} WHERE {}={};' filters = OrderedDict() for table in ('category', 'calendar'): filters[table] = ('namespace_id', namespace_id) for table in ('folder', 'label'): filters[table] = ('account_id', account_id) filters['namespace'] = ('id', namespace_id) for table, (column, id_) in filters.iteritems(): print 'Performing bulk deletion for table: {}'.format(table) start = time.time() if not dry_run: engine.execute(query.format(table, column, id_)) else: print query.format(table, column, id_) end = time.time() print 'Completed bulk deletion for table: {}, time taken: {}'.\ format(table, end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit()
def delete_namespace(namespace_id, throttle=False, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. Raises AccountDeletionErrror with message if there are problems """ with session_scope(namespace_id) as db_session: try: account = (db_session.query(Account).join(Namespace).filter( Namespace.id == namespace_id).one()) except NoResultFound: raise AccountDeletionErrror("Could not find account in database") if not account.is_marked_for_deletion: raise AccountDeletionErrror( "Account is_marked_for_deletion is False. " "Change this to proceed with deletion.") account_id = account.id account_discriminator = account.discriminator log.info("Deleting account", account_id=account_id) start_time = time.time() # These folders are used to configure batch deletion in chunks for # specific tables that are prone to transaction blocking during # large concurrent write volume. See _batch_delete # NOTE: ImapFolderInfo doesn't reall fall into this category but # we include here for simplicity anyway. filters = OrderedDict() for table in [ "message", "block", "thread", "transaction", "actionlog", "event", "contact", "dataprocessingcache", ]: filters[table] = ("namespace_id", namespace_id) if account_discriminator == "easaccount": filters["easuid"] = ("easaccount_id", account_id) filters["easfoldersyncstatus"] = ("account_id", account_id) else: filters["imapuid"] = ("account_id", account_id) filters["imapfoldersyncstatus"] = ("account_id", account_id) filters["imapfolderinfo"] = ("account_id", account_id) from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) for cls in filters: _batch_delete(engine, cls, filters[cls], account_id, throttle=throttle, dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = "DELETE FROM {} WHERE {}={};" filters = OrderedDict() for table in ("category", "calendar"): filters[table] = ("namespace_id", namespace_id) for table in ("folder", "label"): filters[table] = ("account_id", account_id) filters["namespace"] = ("id", namespace_id) for table, (column, id_) in iteritems(filters): log.info("Performing bulk deletion", table=table) start = time.time() if throttle: bulk_throttle() if not dry_run: engine.execute(query.format(table, column, id_)) else: log.debug(query.format(table, column, id_)) end = time.time() log.info("Completed bulk deletion", table=table, time=end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit() # Delete liveness data ( heartbeats) log.debug("Deleting liveness data", account_id=account_id) clear_heartbeat_status(account_id) statsd_client.timing("mailsync.account_deletion.queue.deleted", time.time() - start_time)
def delete_namespace(namespace_id, throttle=False, dry_run=False): """ Delete all the data associated with a namespace from the database. USE WITH CAUTION. NOTE: This function is only called from bin/delete-account-data. It prints to stdout. Raises AccountDeletionErrror with message if there are problems """ with session_scope(namespace_id) as db_session: try: account = db_session.query(Account).join(Namespace).filter(Namespace.id == namespace_id).one() except NoResultFound: raise AccountDeletionErrror( 'Could not find account in database') if not account.is_marked_for_deletion: raise AccountDeletionErrror( 'Account is_marked_for_deletion is False. ' 'Change this to proceed with deletion.') account_id = account.id account_discriminator = account.discriminator log.info('Deleting account', account_id=account_id) start_time = time.time() # These folders are used to configure batch deletion in chunks for # specific tables that are prone to transaction blocking during # large concurrent write volume. See _batch_delete # NOTE: ImapFolderInfo doesn't reall fall into this category but # we include here for simplicity anyway. filters = OrderedDict() for table in ['message', 'block', 'thread', 'transaction', 'actionlog', 'contact', 'event', 'dataprocessingcache']: filters[table] = ('namespace_id', namespace_id) if account_discriminator == 'easaccount': filters['easuid'] = ('easaccount_id', account_id) filters['easfoldersyncstatus'] = ('account_id', account_id) else: filters['imapuid'] = ('account_id', account_id) filters['imapfoldersyncstatus'] = ('account_id', account_id) filters['imapfolderinfo'] = ('account_id', account_id) from inbox.ignition import engine_manager # Bypass the ORM for performant bulk deletion; # we do /not/ want Transaction records created for these deletions, # so this is okay. engine = engine_manager.get_for_id(namespace_id) for cls in filters: _batch_delete(engine, cls, filters[cls], throttle=throttle, dry_run=dry_run) # Use a single delete for the other tables. Rows from tables which contain # cascade-deleted foreign keys to other tables deleted here (or above) # are also not always explicitly deleted, except where needed for # performance. # # NOTE: Namespace, Account are deleted at the end too. query = 'DELETE FROM {} WHERE {}={};' filters = OrderedDict() for table in ('category', 'calendar'): filters[table] = ('namespace_id', namespace_id) for table in ('folder', 'label'): filters[table] = ('account_id', account_id) filters['namespace'] = ('id', namespace_id) for table, (column, id_) in filters.iteritems(): log.info('Performing bulk deletion', table=table) start = time.time() if throttle and check_throttle(): log.info("Throttling deletion") gevent.sleep(60) if not dry_run: engine.execute(query.format(table, column, id_)) else: log.debug(query.format(table, column, id_)) end = time.time() log.info('Completed bulk deletion', table=table, time=end - start) # Delete the account object manually to get rid of the various objects # associated with it (e.g: secrets, tokens, etc.) with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) if dry_run is False: db_session.delete(account) db_session.commit() # Delete liveness data ( heartbeats) log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time)