def upgrade():
    op.add_column("transaction",
                  sa.Column("public_id", mysql.BINARY(16), nullable=True))
    op.add_column(
        "transaction",
        sa.Column("object_public_id", sa.String(length=191), nullable=True),
    )
    op.create_index("ix_transaction_public_id",
                    "transaction", ["public_id"],
                    unique=False)

    # TODO(emfree) reflect
    from inbox.ignition import main_engine
    from inbox.models.session import session_scope
    from inbox.sqlalchemy_ext.util import b36_to_bin, generate_public_id

    engine = main_engine(pool_size=1, max_overflow=0)
    Base = declarative_base()
    Base.metadata.reflect(engine)

    class Transaction(Base):
        __table__ = Base.metadata.tables["transaction"]

    with session_scope(versioned=False) as db_session:
        count = 0
        (num_transactions, ) = db_session.query(sa.func.max(
            Transaction.id)).one()
        print("Adding public ids to {} transactions".format(num_transactions))
        for pointer in range(0, num_transactions + 1, 500):
            for entry in db_session.query(Transaction).filter(
                    Transaction.id >= pointer, Transaction.id < pointer + 500):
                entry.public_id = b36_to_bin(generate_public_id())
                count += 1
                if not count % 500:
                    sys.stdout.write(".")
                    sys.stdout.flush()
                    db_session.commit()
                    garbage_collect()

    op.alter_column("transaction",
                    "public_id",
                    existing_type=mysql.BINARY(16),
                    nullable=False)

    op.add_column(
        "transaction",
        sa.Column("public_snapshot", sa.Text(length=4194304), nullable=True),
    )
    op.add_column(
        "transaction",
        sa.Column("private_snapshot", sa.Text(length=4194304), nullable=True),
    )
    op.drop_column("transaction", u"additional_data")
def upgrade():
    op.add_column('transaction',
                  sa.Column('public_id', mysql.BINARY(16), nullable=True))
    op.add_column(
        'transaction',
        sa.Column('object_public_id', sa.String(length=191), nullable=True))
    op.create_index('ix_transaction_public_id',
                    'transaction', ['public_id'],
                    unique=False)

    from inbox.sqlalchemy_ext.util import generate_public_id, b36_to_bin
    # TODO(emfree) reflect
    from inbox.models.session import session_scope
    from inbox.ignition import main_engine
    engine = main_engine(pool_size=1, max_overflow=0)
    Base = declarative_base()
    Base.metadata.reflect(engine)

    class Transaction(Base):
        __table__ = Base.metadata.tables['transaction']

    with session_scope(versioned=False,
                       ignore_soft_deletes=False) as db_session:
        count = 0
        num_transactions, = db_session.query(sa.func.max(Transaction.id)).one()
        print 'Adding public ids to {} transactions'.format(num_transactions)
        for pointer in range(0, num_transactions + 1, 500):
            for entry in db_session.query(Transaction).filter(
                    Transaction.id >= pointer, Transaction.id < pointer + 500):
                entry.public_id = b36_to_bin(generate_public_id())
                count += 1
                if not count % 500:
                    sys.stdout.write('.')
                    sys.stdout.flush()
                    db_session.commit()
                    garbage_collect()

    op.alter_column('transaction',
                    'public_id',
                    existing_type=mysql.BINARY(16),
                    nullable=False)

    op.add_column(
        'transaction',
        sa.Column('public_snapshot', sa.Text(length=4194304), nullable=True))
    op.add_column(
        'transaction',
        sa.Column('private_snapshot', sa.Text(length=4194304), nullable=True))
    op.drop_column('transaction', u'additional_data')
def upgrade():
    op.add_column('transaction',
                  sa.Column('public_id', mysql.BINARY(16), nullable=True))
    op.add_column('transaction',
                  sa.Column('object_public_id', sa.String(length=191),
                            nullable=True))
    op.create_index('ix_transaction_public_id', 'transaction', ['public_id'],
                    unique=False)

    from inbox.sqlalchemy_ext.util import generate_public_id, b36_to_bin
    # TODO(emfree) reflect
    from inbox.models.session import session_scope
    from inbox.ignition import main_engine
    engine = main_engine(pool_size=1, max_overflow=0)
    Base = declarative_base()
    Base.metadata.reflect(engine)

    class Transaction(Base):
        __table__ = Base.metadata.tables['transaction']

    with session_scope(versioned=False) as db_session:
        count = 0
        num_transactions, = db_session.query(sa.func.max(Transaction.id)).one()
        print 'Adding public ids to {} transactions'.format(num_transactions)
        for pointer in range(0, num_transactions + 1, 500):
            for entry in db_session.query(Transaction).filter(
                    Transaction.id >= pointer,
                    Transaction.id < pointer + 500):
                entry.public_id = b36_to_bin(generate_public_id())
                count += 1
                if not count % 500:
                    sys.stdout.write('.')
                    sys.stdout.flush()
                    db_session.commit()
                    garbage_collect()

    op.alter_column('transaction', 'public_id',
                    existing_type=mysql.BINARY(16), nullable=False)

    op.add_column('transaction', sa.Column('public_snapshot',
                                           sa.Text(length=4194304),
                                           nullable=True))
    op.add_column('transaction', sa.Column('private_snapshot',
                                           sa.Text(length=4194304),
                                           nullable=True))
    op.drop_column('transaction', u'additional_data')
Exemplo n.º 4
0
def main():
    # Load plugins
    group = 'inbox.consistency_check_plugins'
    plugins = []    # see ListPlugin as an example
    for entry_point in pkg_resources.iter_entry_points(group):
        plugin_factory = entry_point.load()     # usually a python class
        plugin = plugin_factory()
        plugins.append((entry_point.name, plugin))

    # Create argument parser
    # NOTE: In the future, the interface may change to accept namespace
    # public_ids instead of account public_ids.
    parser = argparse.ArgumentParser(
        description="""
        Shows differences between metadata fetched from the specified
        account(s) and what's stored in the local Inbox database.
        """,
        epilog="""
        Only Gmail accounts are currently supported.
        """)
    parser.add_argument(
        "public_ids", nargs='*', metavar="PUBLIC_ID",
        type=lambda x: int128_to_b36(b36_to_bin(x)), default=ALL_ACCOUNTS,
        help="account(s) to check (default: %(default)s)")
    parser.add_argument(
        '--cache-dir', default='./cache',
        help="cache directory (default: %(default)s)")
    parser.add_argument(
        '--no-overwrite', action='store_false', dest='force_overwrite',
        help="skip cache files already generated (default: overwrite them)")
    parser.add_argument(
        '--no-fetch', action='store_false', dest='do_slurp',
        help="don't fetch")
    parser.add_argument(
        '--no-dump', action='store_false', dest='do_dump',
        help="don't dump")
    parser.add_argument(
        '--no-diff', action='store_false', dest='do_diff',
        help="don't diff")
    execute_hooks(plugins, 'argparse_addoption')(parser)

    # Parse arguments
    args = parser.parse_args()
    execute_hooks(plugins, 'argparse_args')(args)

    # Make sure the cache directory exists.
    if not os.path.exists(args.cache_dir):
        os.mkdir(args.cache_dir)

    with session_scope() as db_session:
        # Query the list of accounts
        query = db_session.query(Account)
        if args.public_ids is not ALL_ACCOUNTS:
            query = query.filter(Account.public_id.in_(args.public_ids))
        accounts = query.all()

        # list.py uses this hook to show a list of accounts
        execute_hooks(plugins, 'process_accounts')(accounts)

        # hax
        if args.do_list:
            return

        # Query namespaces
        query = (
            db_session.query(Namespace, Account)
            .filter(Namespace.account_id == Account.id)
            .order_by(Namespace.id)
        )
        if args.public_ids is not ALL_ACCOUNTS:
            query = query.filter(Namespace.public_id.in_(args.public_ids))
        nnaa = query.all()

        # check for discrepancies
        missing_accounts = (set(a.public_id for ns, a in nnaa) ^
                            set(a.public_id for a in accounts))
        if missing_accounts:
            raise AssertionError("Missing accounts: %r" % (missing_accounts,))

        # Fetch metadata for each account and save it into a sqlite3 database
        # in the cache_dir.
        # - See imap_gm.py & local_gm.py
        # - See sqlite3_db.py for sqlite3 database schema.
        # This creates files like:
        # - cache/<account.public_id>.<namespace.public_id>.imap_gm.sqlite3
        # - cache/<account.public_id>.<namespace.public_id>.local_gm.sqlite3
        if args.do_slurp:
            for namespace, account in nnaa:
                can_slurp = execute_hooks(plugins, 'can_slurp_namespace')(
                    namespace=namespace,
                    account=account)
                for i, (plugin_name, plugin) in enumerate(plugins):
                    if not can_slurp[i]:
                        continue

                    db_path = os.path.join(
                        args.cache_dir,
                        cachefile_basename(
                            namespace=namespace,
                            account=account,
                            plugin_name=plugin_name,
                            ext='.sqlite3'))

                    if os.path.exists(db_path):
                        if not args.force_overwrite:
                            # already saved
                            print(
                                "skipping {0}: already exists".format(db_path),
                                file=sys.stderr)
                            continue
                        os.unlink(db_path)

                    db = init_sqlite3_db(connect_sqlite3_db(db_path))

                    with db:
                        execute_hook(plugin, 'slurp_namespace')(
                            namespace=namespace,
                            account=account,
                            db=db)

        # Generate canonical-format text files from the sqlite3 databases.
        # - See dump_gm.py
        # This creates files like:
        # - cache/<account.public_id>.<namespace.public_id>.imap_gm.txt
        # - cache/<account.public_id>.<namespace.public_id>.local_gm.txt
        if args.do_dump:
            for namespace, account in nnaa:
                can_dump = execute_hooks(plugins, 'can_dump_namespace')(
                    namespace=namespace,
                    account=account)
                for i, (plugin_name, plugin) in enumerate(plugins):
                    if not can_dump[i]:
                        continue

                    db_path = os.path.join(args.cache_dir, cachefile_basename(
                        namespace=namespace,
                        account=account,
                        plugin_name=plugin_name,
                        ext='.sqlite3'))

                    txt_path = os.path.join(args.cache_dir, cachefile_basename(
                        namespace=namespace,
                        account=account,
                        plugin_name=plugin_name,
                        ext='.txt'))

                    try:
                        db_stat = os.stat(db_path)
                    except OSError as e:
                        if e.errno != errno.ENOENT:
                            raise
                        db_stat = None
                    try:
                        txt_stat = os.stat(txt_path)
                    except OSError as e:
                        if e.errno != errno.ENOENT:
                            raise
                        txt_stat = None

                    if (db_stat and txt_stat and
                            db_stat.st_mtime < txt_stat.st_mtime):
                        print(
                            "skipping {0}: already exists".format(txt_path),
                            file=sys.stderr)
                        continue

                    db = connect_sqlite3_db(db_path)

                    with db, open(txt_path, "w") as txtfile:
                        execute_hook(plugin, 'dump_namespace')(
                            db=db,
                            txtfile=txtfile)

        # Show differences between the text files in the cache directory.
        # Basically, this runs something like the following for each account:
        # vimdiff cache/${acct_pubid}.${ns_pubid).imap_gm.txt \
        #     cache/${acct_pubid}.${ns_pubid).local_gm.txt
        if args.do_diff:
            if os.system("which vimdiff >/dev/null") == 0:
                diff_cmd = ['vimdiff']
            else:
                diff_cmd = ['diff', '-u']

            for namespace, account in nnaa:
                # plugins here would be nice here, too
                # This is such a hack
                files_to_diff = sorted(
                    os.path.join(args.cache_dir, f)
                    for f in os.listdir(args.cache_dir)
                    if fnmatch(f, cachefile_basename(
                        namespace=namespace,
                        account=account,
                        plugin_name='*',
                        ext='.txt')))
                if files_to_diff:
                    status = subprocess.call(diff_cmd + files_to_diff)
                    if status not in (0, 1):
                        raise AssertionError("error running diff")
Exemplo n.º 5
0
def main():
    # Load plugins
    group = 'inbox.consistency_check_plugins'
    plugins = []    # see ListPlugin as an example
    for entry_point in pkg_resources.iter_entry_points(group):
        plugin_factory = entry_point.load()     # usually a python class
        plugin = plugin_factory()
        plugins.append((entry_point.name, plugin))

    # Create argument parser
    # NOTE: In the future, the interface may change to accept namespace
    # public_ids instead of account public_ids.
    parser = argparse.ArgumentParser(
        description="""
        Shows differences between metadata fetched from the specified
        account(s) and what's stored in the local Inbox database.
        """,
        epilog = """
        Only Gmail accounts are currently supported.
        """)
    parser.add_argument(
        "public_ids", nargs='*', metavar="PUBLIC_ID",
        type=lambda x: int128_to_b36(b36_to_bin(x)), default=ALL_ACCOUNTS,
        help="account(s) to check (default: %(default)s)")
    parser.add_argument(
        '--cache-dir', default='./cache',
        help="cache directory (default: %(default)s)")
    parser.add_argument(
        '--no-overwrite', action='store_false', dest='force_overwrite',
        help="skip cache files already generated (default: overwrite them)")
    parser.add_argument(
        '--no-fetch', action='store_false', dest='do_slurp',
        help="don't fetch")
    parser.add_argument(
        '--no-dump', action='store_false', dest='do_dump',
        help="don't dump")
    parser.add_argument(
        '--no-diff', action='store_false', dest='do_diff',
        help="don't diff")
    execute_hooks(plugins, 'argparse_addoption')(parser)

    # Parse arguments
    args = parser.parse_args()
    execute_hooks(plugins, 'argparse_args')(args)

    # Make sure the cache directory exists.
    if not os.path.exists(args.cache_dir):
        os.mkdir(args.cache_dir)

    with session_scope() as db_session:
        # Query the list of accounts
        query = db_session.query(Account)
        if args.public_ids is not ALL_ACCOUNTS:
            query = query.filter(Account.public_id.in_(args.public_ids))
        accounts = query.all()

        # list.py uses this hook to show a list of accounts
        execute_hooks(plugins, 'process_accounts')(accounts)

        # hax
        if args.do_list:
            return

        # Query namespaces
        query = (
            db_session.query(Namespace, Account)
            .filter(Namespace.account_id == Account.id)
            .order_by(Namespace.id)
        )
        if args.public_ids is not ALL_ACCOUNTS:
            query = query.filter(Namespace.public_id.in_(args.public_ids))
        nnaa = query.all()

        # check for discrepancies
        missing_accounts = (set(a.public_id for ns, a in nnaa) ^
                            set(a.public_id for a in accounts))
        if missing_accounts:
            raise AssertionError("Missing accounts: %r" % (missing_accounts,))

        # Fetch metadata for each account and save it into a sqlite3 database
        # in the cache_dir.
        # - See imap_gm.py & local_gm.py
        # - See sqlite3_db.py for sqlite3 database schema.
        # This creates files like:
        # - cache/<account.public_id>.<namespace.public_id>.imap_gm.sqlite3
        # - cache/<account.public_id>.<namespace.public_id>.local_gm.sqlite3
        if args.do_slurp:
            for namespace, account in nnaa:
                can_slurp = execute_hooks(plugins, 'can_slurp_namespace')(
                    namespace=namespace,
                    account=account)
                for i, (plugin_name, plugin) in enumerate(plugins):
                    if not can_slurp[i]:
                        continue

                    db_path = os.path.join(
                        args.cache_dir,
                        cachefile_basename(
                            namespace=namespace,
                            account=account,
                            plugin_name=plugin_name,
                            ext='.sqlite3'))

                    if os.path.exists(db_path):
                        if not args.force_overwrite:
                            # already saved
                            print(
                                "skipping {0}: already exists".format(db_path),
                                file=sys.stderr)
                            continue
                        os.unlink(db_path)

                    db = init_sqlite3_db(connect_sqlite3_db(db_path))

                    with db:
                        execute_hook(plugin, 'slurp_namespace')(
                            namespace=namespace,
                            account=account,
                            db=db)

        # Generate canonical-format text files from the sqlite3 databases.
        # - See dump_gm.py
        # This creates files like:
        # - cache/<account.public_id>.<namespace.public_id>.imap_gm.txt
        # - cache/<account.public_id>.<namespace.public_id>.local_gm.txt
        if args.do_dump:
            for namespace, account in nnaa:
                can_dump = execute_hooks(plugins, 'can_dump_namespace')(
                    namespace=namespace,
                    account=account)
                for i, (plugin_name, plugin) in enumerate(plugins):
                    if not can_dump[i]:
                        continue

                    db_path = os.path.join(args.cache_dir, cachefile_basename(
                        namespace=namespace,
                        account=account,
                        plugin_name=plugin_name,
                        ext='.sqlite3'))

                    txt_path = os.path.join(args.cache_dir, cachefile_basename(
                        namespace=namespace,
                        account=account,
                        plugin_name=plugin_name,
                        ext='.txt'))

                    try:
                        db_stat = os.stat(db_path)
                    except OSError as e:
                        if e.errno != errno.ENOENT:
                            raise
                        db_stat = None
                    try:
                        txt_stat = os.stat(txt_path)
                    except OSError as e:
                        if e.errno != errno.ENOENT:
                            raise
                        txt_stat = None

                    if (db_stat and txt_stat and
                            db_stat.st_mtime < txt_stat.st_mtime):
                        print(
                            "skipping {0}: already exists".format(txt_path),
                            file=sys.stderr)
                        continue

                    db = connect_sqlite3_db(db_path)

                    with db, open(txt_path, "w") as txtfile:
                        execute_hook(plugin, 'dump_namespace')(
                            db=db,
                            txtfile=txtfile)

        # Show differences between the text files in the cache directory.
        # Basically, this runs something like the following for each account:
        #   vimdiff cache/${acct_pubid}.${ns_pubid).imap_gm.txt cache/${acct_pubid}.${ns_pubid).local_gm.txt
        if args.do_diff:
            if os.system("which vimdiff >/dev/null") == 0:
                diff_cmd = ['vimdiff']
            else:
                diff_cmd = ['diff', '-u']

            for namespace, account in nnaa:
                # plugins here would be nice here, too
                # This is such a hack
                files_to_diff = sorted(
                    os.path.join(args.cache_dir, f)
                    for f in os.listdir(args.cache_dir)
                    if fnmatch(f, cachefile_basename(
                        namespace=namespace,
                        account=account,
                        plugin_name='*',
                        ext='.txt')))
                if files_to_diff:
                    status = subprocess.call(diff_cmd + files_to_diff)
                    if status not in (0, 1):
                        raise AssertionError("error running diff")