Exemplo n.º 1
0
def watch(limit):
    """watch scan rates across the cluster"""
    period = 5.0
    prev = db.db()
    prev_totals = None

    while True:
        click.clear()
        time.sleep(period)
        cur = db.db()
        cur.data['gkrate'] = {}
        progress = []
        prev_buckets = {b.bucket_id: b for b in prev.buckets()}

        totals = {'scanned': 0, 'krate': 0, 'lrate': 0, 'bucket_id': 'totals'}

        for b in cur.buckets():
            if not b.scanned:
                continue

            totals['scanned'] += b.scanned
            totals['krate'] += b.krate
            totals['lrate'] += b.lrate

            if b.bucket_id not in prev_buckets:
                b.data['gkrate'][b.bucket_id] = b.scanned / period
            elif b.scanned == prev_buckets[b.bucket_id].scanned:
                continue
            else:
                b.data['gkrate'][b.bucket_id] = (
                    b.scanned - prev_buckets[b.bucket_id].scanned) / period
            progress.append(b)

        if prev_totals is None:
            totals['gkrate'] = '...'
        else:
            totals['gkrate'] = (totals['scanned'] -
                                prev_totals['scanned']) / period
        prev = cur
        prev_totals = totals

        progress = sorted(progress, key=lambda x: x.gkrate, reverse=True)

        if limit:
            progress = progress[:limit]

        progress.insert(0, Bag(totals))
        format_plain(progress,
                     None,
                     explicit_only=True,
                     keys=['bucket_id', 'scanned', 'gkrate', 'lrate', 'krate'])
Exemplo n.º 2
0
def watch(limit):
    """watch scan rates across the cluster"""
    period = 5.0
    prev = db.db()
    prev_totals = None

    while True:
        click.clear()
        time.sleep(period)
        cur = db.db()
        cur.data['gkrate'] = {}
        progress = []
        prev_buckets = {b.bucket_id: b for b in prev.buckets()}

        totals = {'scanned': 0, 'krate': 0, 'lrate': 0, 'bucket_id': 'totals'}

        for b in cur.buckets():
            if not b.scanned:
                continue

            totals['scanned'] += b.scanned
            totals['krate'] += b.krate
            totals['lrate'] += b.lrate

            if b.bucket_id not in prev_buckets:
                b.data['gkrate'][b.bucket_id] = b.scanned / period
            elif b.scanned == prev_buckets[b.bucket_id].scanned:
                continue
            else:
                b.data['gkrate'][b.bucket_id] = (
                    b.scanned - prev_buckets[b.bucket_id].scanned) / period
            progress.append(b)

        if prev_totals is None:
            totals['gkrate'] = '...'
        else:
            totals['gkrate'] = (totals['scanned'] - prev_totals['scanned']) / period
        prev = cur
        prev_totals = totals

        progress = sorted(progress, key=lambda x: x.gkrate, reverse=True)

        if limit:
            progress = progress[:limit]

        progress.insert(0, utils.Bag(totals))
        format_plain(
            progress, None,
            explicit_only=True,
            keys=['bucket_id', 'scanned', 'gkrate', 'lrate', 'krate'])
Exemplo n.º 3
0
def inspect_bucket(bucket):
    """Show all information known on a bucket."""
    state = db.db()
    found = None
    for b in state.buckets():
        if b.name == bucket:
            found = b
    if not found:
        click.echo("no bucket named: %s" % bucket)
        return

    click.echo("Bucket: %s" % found.name)
    click.echo("Account: %s" % found.account)
    click.echo("Region: %s" % found.region)
    click.echo("Created: %s" % found.created)
    click.echo("Size: %s" % found.size)
    click.echo("Inventory: %s" % found.inventory)
    click.echo("Partitions: %s" % found.partitions)
    click.echo("Scanned: %0.2f%%" % found.percent_scanned)
    click.echo("")
    click.echo("Errors")

    click.echo("Denied: %s" % found.keys_denied)
    click.echo("BErrors: %s" % found.error_count)
    click.echo("KErrors: %s" % found.data['keys-error'].get(found.bucket_id, 0))
    click.echo("Throttle: %s" % found.data['keys-throttled'].get(found.bucket_id, 0))
    click.echo("Missing: %s" % found.data['keys-missing'].get(found.bucket_id, 0))
    click.echo("Session: %s" % found.data['keys-sesserr'].get(found.bucket_id, 0))
    click.echo("Connection: %s" % found.data['keys-connerr'].get(found.bucket_id, 0))
    click.echo("Endpoint: %s" % found.data['keys-enderr'].get(found.bucket_id, 0))
Exemplo n.º 4
0
def buckets(bucket=None, account=None, matched=False, kdenied=False,
            errors=False, dbpath=None, size=None, denied=False):
    """Report on stats by bucket"""
    d = db.db(dbpath)

    def _repr(b):
        return (
            "account:%s name:%s percent:%0.2f matched:%d "
            "scanned:%d size:%d kdenied:%d errors:%d partitions:%d") % (
                b.account,
                b.name,
                b.percent_scanned,
                b.matched,
                b.scanned,
                b.size,
                b.keys_denied,
                b.error_count,
                b.partitions)

    for b in d.buckets(account):
        if bucket and b.name not in bucket:
            continue
        if matched and not b.matched:
            continue
        if kdenied and not b.keys_denied:
            continue
        if errors and not b.errors:
            continue
        if size and b.size < size:
            continue
        if denied and not b.denied:
            continue
        click.echo(_repr(b))
Exemplo n.º 5
0
def buckets(bucket=None,
            account=None,
            matched=False,
            kdenied=False,
            errors=False,
            dbpath=None,
            size=None,
            denied=False,
            format=None,
            output=None):
    """Report on stats by bucket"""
    d = db.db(dbpath)

    buckets = []
    for b in sorted(d.buckets(account), key=operator.attrgetter('bucket_id')):
        if bucket and b.name not in bucket:
            continue
        if matched and not b.matched:
            continue
        if kdenied and not b.keys_denied:
            continue
        if errors and not b.errors:
            continue
        if size and b.size < size:
            continue
        if denied and not b.denied:
            continue
        buckets.append(b)

    formatter = format == 'csv' and format_csv or format_plain
    formatter(buckets, output)
Exemplo n.º 6
0
def inspect_bucket(bucket):
    """Show all information known on a bucket."""
    state = db.db()
    found = None
    for b in state.buckets():
        if b.name == bucket:
            found = b
    if not found:
        click.echo("no bucket named: %s" % bucket)
        return

    click.echo("Bucket: %s" % found.name)
    click.echo("Account: %s" % found.account)
    click.echo("Region: %s" % found.region)
    click.echo("Created: %s" % found.created)
    click.echo("Size: %s" % found.size)
    click.echo("Inventory: %s" % found.inventory)
    click.echo("Partitions: %s" % found.partitions)
    click.echo("Scanned: %0.2f%%" % found.percent_scanned)
    click.echo("")
    click.echo("Errors")

    click.echo("Denied: %s" % found.keys_denied)
    click.echo("BErrors: %s" % found.error_count)
    click.echo("KErrors: %s" % found.data['keys-error'].get(found.bucket_id, 0))
    click.echo("Throttle: %s" % found.data['keys-throttled'].get(found.bucket_id, 0))
    click.echo("Missing: %s" % found.data['keys-missing'].get(found.bucket_id, 0))
    click.echo("Session: %s" % found.data['keys-sesserr'].get(found.bucket_id, 0))
    click.echo("Connection: %s" % found.data['keys-connerr'].get(found.bucket_id, 0))
    click.echo("Endpoint: %s" % found.data['keys-enderr'].get(found.bucket_id, 0))
Exemplo n.º 7
0
def buckets(bucket=None, account=None, matched=False, kdenied=False,
            errors=False, dbpath=None, size=None, denied=False,
            format=None, output=None):
    """Report on stats by bucket"""
    d = db.db(dbpath)

    buckets = []
    for b in sorted(d.buckets(account),
                    key=operator.attrgetter('bucket_id')):
        if bucket and b.name not in bucket:
            continue
        if matched and not b.matched:
            continue
        if kdenied and not b.keys_denied:
            continue
        if errors and not b.errors:
            continue
        if size and b.size < size:
            continue
        if denied and not b.denied:
            continue
        buckets.append(b)

    formatter = format == 'csv' and format_csv or format_plain
    formatter(buckets, output)
Exemplo n.º 8
0
def buckets(bucket=None, account=None, matched=False, kdenied=False,
            errors=False, dbpath=None, size=None, denied=False,
            format=None, incomplete=False, oversize=False, region=(),
            not_region=(), inventory=None, output=None, config=None, sort=None,
            tagprefix=None, not_bucket=None):
    """Report on stats by bucket"""

    d = db.db(dbpath)

    if tagprefix and not config:
        raise ValueError(
            "account tag value inclusion requires account config file")

    if config and tagprefix:
        with open(config) as fh:
            data = json.load(fh).get('accounts')
            account_data = {}
            for a in data:
                for t in a['tags']:
                    if t.startswith(tagprefix):
                        account_data[a['name']] = t[len(tagprefix):]

    buckets = []
    for b in sorted(d.buckets(account),
                    key=operator.attrgetter('bucket_id')):
        if bucket and b.name not in bucket:
            continue
        if not_bucket and b.name in not_bucket:
            continue
        if matched and not b.matched:
            continue
        if kdenied and not b.keys_denied:
            continue
        if errors and not b.error_count:
            continue
        if size and b.size < size:
            continue
        if inventory and not b.using_inventory:
            continue
        if denied and not b.denied:
            continue
        if oversize and b.scanned <= b.size:
            continue
        if incomplete and b.percent_scanned >= incomplete:
            continue
        if region and b.region not in region:
            continue
        if not_region and b.region in not_region:
            continue
        if tagprefix:
            setattr(b, tagprefix[:-1], account_data[b.account])
        buckets.append(b)

    if sort:
        key = operator.attrgetter(sort)
        buckets = list(reversed(sorted(buckets, key=key)))
    formatter = format == 'csv' and format_csv or format_plain
    keys = tagprefix and (tagprefix[:-1],) or ()
    formatter(buckets, output, keys=keys)
Exemplo n.º 9
0
def buckets(bucket=None, account=None, matched=False, kdenied=False,
            errors=False, dbpath=None, size=None, denied=False,
            format=None, incomplete=False, oversize=False, region=(),
            not_region=(), inventory=None, output=None, config=None, sort=None,
            tagprefix=None, not_bucket=None):
    """Report on stats by bucket"""

    d = db.db(dbpath)

    if tagprefix and not config:
        raise ValueError(
            "account tag value inclusion requires account config file")

    if config and tagprefix:
        with open(config) as fh:
            data = json.load(fh).get('accounts')
            account_data = {}
            for a in data:
                for t in a['tags']:
                    if t.startswith(tagprefix):
                        account_data[a['name']] = t[len(tagprefix):]

    buckets = []
    for b in sorted(d.buckets(account),
                    key=operator.attrgetter('bucket_id')):
        if bucket and b.name not in bucket:
            continue
        if not_bucket and b.name in not_bucket:
            continue
        if matched and not b.matched:
            continue
        if kdenied and not b.keys_denied:
            continue
        if errors and not b.error_count:
            continue
        if size and b.size < size:
            continue
        if inventory and not b.using_inventory:
            continue
        if denied and not b.denied:
            continue
        if oversize and b.scanned <= b.size:
            continue
        if incomplete and b.percent_scanned >= incomplete:
            continue
        if region and b.region not in region:
            continue
        if not_region and b.region in not_region:
            continue
        if tagprefix:
            setattr(b, tagprefix[:-1], account_data[b.account])
        buckets.append(b)

    if sort:
        key = operator.attrgetter(sort)
        buckets = list(reversed(sorted(buckets, key=key)))
    formatter = format == 'csv' and format_csv or format_plain
    keys = tagprefix and (tagprefix[:-1],) or ()
    formatter(buckets, output, keys=keys)
Exemplo n.º 10
0
def accounts(dbpath,
             output,
             format,
             account,
             config=None,
             tag=None,
             tagprefix=None,
             region=(),
             not_region=(),
             not_bucket=None):
    """Report on stats by account"""
    d = db.db(dbpath)
    accounts = d.accounts()
    formatter = (format == 'csv' and format_accounts_csv
                 or format_accounts_plain)

    if region:
        for a in accounts:
            a.buckets = [b for b in a.buckets if b.region in region]
        accounts = [a for a in accounts if a.bucket_count]

    if not_region:
        for a in accounts:
            a.buckets = [b for b in a.buckets if b.region not in not_region]
        accounts = [a for a in accounts if a.bucket_count]

    if not_bucket:
        for a in accounts:
            a.buckets = [b for b in a.buckets if b.name not in not_bucket]
    if config and tagprefix:
        account_map = {account.name: account for account in accounts}

        with open(config) as fh:
            account_data = json.load(fh).get('accounts')
        tag_groups = {}
        for a in account_data:
            if tag is not None and tag not in a['tags']:
                continue

            for t in a['tags']:
                if t.startswith(tagprefix):
                    tvalue = t[len(tagprefix):]
                    if not tvalue:
                        continue
                    if tvalue not in tag_groups:
                        tag_groups[tvalue] = db.Account(tvalue, [])
                    account_results = account_map.get(a['name'])
                    if not account_results:
                        print("missing %s" % a['name'])
                        continue
                    tag_groups[tvalue].buckets.extend(
                        account_map[a['name']].buckets)
        accounts = tag_groups.values()

    formatter(accounts, output)
Exemplo n.º 11
0
def inspect_partitions(bucket):
    """Discover the partitions on a bucket via introspection.

    For large buckets which lack s3 inventories, salactus will attempt
    to process objects in parallel on the bucket by breaking the bucket
    into a separate keyspace partitions. It does this with a heurestic
    that attempts to sample the keyspace and determine appropriate subparts.

    This command provides additional visibility into the partitioning of
    a bucket by showing how salactus would partition a given bucket.
    """

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s: %(name)s:%(levelname)s %(message)s")
    logging.getLogger('botocore').setLevel(level=logging.WARNING)

    state = db.db()
    # add db.bucket accessor
    found = None
    for b in state.buckets():
        if b.name == bucket:
            found = b
            break
    if not found:
        click.echo("no bucket named: %s" % bucket)
        return

    keyset = []
    partitions = []

    def process_keyset(bid, page):
        keyset.append(len(page))

    def process_bucket_iterator(bid, prefix, delimiter="", **continuation):
        partitions.append(prefix)

    # synchronous execution
    def invoke(f, *args, **kw):
        return f(*args, **kw)

    # unleash the monkies ;-)
    worker.connection.hincrby = lambda x, y, z: True
    worker.invoke = invoke
    worker.process_keyset = process_keyset
    worker.process_bucket_iterator = process_bucket_iterator

    # kick it off
    worker.process_bucket_partitions(b.bucket_id)

    keys_scanned = sum(keyset)
    click.echo(
        "Found %d partitions %s keys scanned during partitioning" % (
            len(partitions), keys_scanned))
    click.echo("\n".join(partitions))
Exemplo n.º 12
0
def inspect_partitions(bucket):
    """Discover the partitions on a bucket via introspection.

    For large buckets which lack s3 inventories, salactus will attempt
    to process objects in parallel on the bucket by breaking the bucket
    into a separate keyspace partitions. It does this with a heurestic
    that attempts to sample the keyspace and determine appropriate subparts.

    This command provides additional visibility into the partitioning of
    a bucket by showing how salactus would partition a given bucket.
    """

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s: %(name)s:%(levelname)s %(message)s")
    logging.getLogger('botocore').setLevel(level=logging.WARNING)

    state = db.db()
    # add db.bucket accessor
    found = None
    for b in state.buckets():
        if b.name == bucket:
            found = b
            break
    if not found:
        click.echo("no bucket named: %s" % bucket)
        return

    keyset = []
    partitions = []

    def process_keyset(bid, page):
        keyset.append(len(page))

    def process_bucket_iterator(bid, prefix, delimiter="", **continuation):
        partitions.append(prefix)

    # synchronous execution
    def invoke(f, *args, **kw):
        return f(*args, **kw)

    # unleash the monkies ;-)
    worker.connection.hincrby = lambda x, y, z: True
    worker.invoke = invoke
    worker.process_keyset = process_keyset
    worker.process_bucket_iterator = process_bucket_iterator

    # kick it off
    worker.process_bucket_partitions(b.bucket_id)

    keys_scanned = sum(keyset)
    click.echo(
        "Found %d partitions %s keys scanned during partitioning" % (
            len(partitions), keys_scanned))
    click.echo("\n".join(partitions))
Exemplo n.º 13
0
def accounts(dbpath, account):
    """Report on stats by account"""
    d = db.db(dbpath)

    def _repr(a):
        return "name:%s, matched:%d percent:%0.2f scanned:%d size:%d buckets:%d" % (
            a.name, a.matched, a.percent_scanned, a.scanned, a.size,
            len(a.buckets))

    for a in sorted(d.accounts(), key=operator.attrgetter('name')):
        click.echo(_repr(a))
Exemplo n.º 14
0
def accounts(dbpath, account):
    """Report on stats by account"""
    d = db.db(dbpath)

    def _repr(a):
        return "name:%s, matched:%d percent:%0.2f scanned:%d size:%d buckets:%d" % (
            a.name,
            a.matched,
            a.percent_scanned,
            a.scanned,
            a.size,
            len(a.buckets))

    for a in sorted(d.accounts(), key=operator.attrgetter('name')):
        click.echo(_repr(a))
Exemplo n.º 15
0
def accounts(dbpath, output, format, account,
             config=None, tag=None, tagprefix=None, region=(),
             not_region=(), not_bucket=None):
    """Report on stats by account"""
    d = db.db(dbpath)
    accounts = d.accounts()
    formatter = (
        format == 'csv' and format_accounts_csv or format_accounts_plain)

    if region:
        for a in accounts:
            a.buckets = [b for b in a.buckets if b.region in region]
        accounts = [a for a in accounts if a.bucket_count]

    if not_region:
        for a in accounts:
            a.buckets = [b for b in a.buckets if b.region not in not_region]
        accounts = [a for a in accounts if a.bucket_count]

    if not_bucket:
        for a in accounts:
            a.buckets = [b for b in a.buckets if b.name not in not_bucket]
    if config and tagprefix:
        account_map = {account.name: account for account in accounts}

        with open(config) as fh:
            account_data = json.load(fh).get('accounts')
        tag_groups = {}
        for a in account_data:
            if tag is not None and tag not in a['tags']:
                continue

            for t in a['tags']:
                if t.startswith(tagprefix):
                    tvalue = t[len(tagprefix):]
                    if not tvalue:
                        continue
                    if tvalue not in tag_groups:
                        tag_groups[tvalue] = db.Account(tvalue, [])
                    account_results = account_map.get(a['name'])
                    if not account_results:
                        print("missing %s" % a['name'])
                        continue
                    tag_groups[tvalue].buckets.extend(
                        account_map[a['name']].buckets)
        accounts = tag_groups.values()

    formatter(accounts, output)
Exemplo n.º 16
0
def save(dbpath):
    """Save the current state to a json file
    """
    d = db.db()
    d.save(dbpath)
Exemplo n.º 17
0
def save(dbpath):
    """Save the current state to a json file
    """
    d = db.db()
    d.save(dbpath)