Пример #1
0
def image_composite(inputs, algo, expr, output, oformat, creation_options,
                    blue, green, red, nir, fswir, sswir, band,
                    mask_band, mask_val,
                    verbose, quiet):
    """ Create image composites based on some criteria

    Output image composites retain original values from input images that meet
    a certain criteria. For example, in a maximum NDVI composite with 10 input
    images, all bands for a given pixel will contain the band values from the
    input raster that had the highest NDVI value.

    Users can choose from a set of predefined compositing algorithms or may
    specify an Snuggs S-expression that defines the compositing criteria.
    Normalized Differenced indexes can be computed using "(normdiff a b)" for
    the Normalized Difference between "a" and "b" (or "nir" and "red").
    See https://github.com/mapbox/snuggs for more information on Snuggs
    expressions.

    The indexes for common optical bands (e.g., red, nir, blue) within the
    input rasters are included as optional arguments and are indexed in
    wavelength sequential order. You may need to overwrite the default indexes
    of bands used in a given S-expression with the correct band index.
    Additional bands may be identified and indexed using the
    '--band NAME=INDEX' option.

    Currently, input images must be "stacked", meaning that they contain the
    same bands and are the same shape and extent.

    Example:

    1. Create a composite based on maximum NDVI

        Use the built-in maxNDVI algorithm:

        \b
        $ image_composite.py --algo maxNDVI image1.gtif image2.gtif image3.gtif
            composite_maxNDVI.gtif

        or with S-expression:

        \b
        $ image_composite.py --expr '(max (/ (- nir red) (+ nir red)))'
            image1.gtif image2.gtif image3.gtif composite_maxNDVI.gtif

        or with S-expressions using the normdiff shortcut:

        \b
        $ image_composite.py --expr '(max (normdiff nir red))'
            image1.gtif image2.gtif image3.gtif composite_maxNDVI.gtif

    2. Create a composite based on median EVI (not recommended)

        With S-expression:

        \b
        $ evi='(median (/ (- nir red) (+ (- (+ nir (* 6 red)) (* 7.5 blue)) 1)))'
        $ image_composite.py --expr "$evi"  image1.gtif image2.gtif image3.gtif
            composite_medianEVI.gtif

    3. Create a composite based on median NBR

        With S-expression:

        \b
        $ image_composite.py --expr '(median (normdiff nir sswir))'
            image1.gtif image2.gtif image3.gtif composite_maxNBR.gtif

    """
    if verbose:
        logger.setLevel(logging.DEBUG)
    elif quiet:
        logger.setLevel(logging.ERROR)

    # Prefer built-in algorithms to expressions if both are specified
    if not algo and not expr:
        raise click.UsageError('Error: must specify either --algo or --expr')
    elif algo is not None and expr is not None:
        logger.warning('Predefined algorithm and expression both defined. '
                       'Composite will be generated with predefined algorithm')
        expr = _ALGO[algo]
    elif algo is not None:
        logger.debug('Using predefined algorithm: {}'.format(algo))
        expr = _ALGO[algo]

    if not quiet:
        click.echo('Compositing criteria S-expression: "{}"'.format(expr))

    # Setup band keywords
    _bands = {'blue': blue, 'green': green, 'red': red,
              'nir': nir, 'fswir': fswir, 'sswir': sswir}
    # Parse any additional, user specified bands
    if band:
        for k, v in band.iteritems():
            try:
                _bands.update({k: int(v)})
            except ValueError:
                raise click.BadParameter(
                    'Value specified as KEY=VAL pair in --band must be an int')

    # Find only the band names and indexes required for the composite criteria
    crit_indices = {k: v - 1 for k, v in _bands.iteritems() if k in expr}

    # Enhance snuggs expressions to return index of value matching function
    snuggs.func_map['max'] = lambda a: np.argmax(a, axis=0)
    snuggs.func_map['min'] = lambda a: np.argmin(a, axis=0)
    snuggs.func_map['median'] = lambda a: np.argmin(
        np.abs(a - np.median(a, axis=0)), axis=0)
    snuggs.func_map['normdiff'] = lambda a, b: snuggs.eval(
        '(/ (- a b) (+ a b))', **{'a':a, 'b':b})

    with rasterio.drivers():

        # Read in the first image to fetch metadata
        with rasterio.open(inputs[0]) as first:
            meta = first.meta
            if 'transform' in meta:
                meta.pop('transform')  # remove transform since deprecated
            meta.update(driver=oformat)
            meta.update(**creation_options)
            if len(set(first.block_shapes)) != 1:
                click.echo('Cannot process input files - '
                           'All bands must have same block shapes')
                raise click.Abort()
            block_nrow, block_ncol = first.block_shapes[0]
            windows = first.block_windows(1)
            n_windows = math.ceil(meta['height'] / block_nrow *
                                  meta['width'] / block_ncol)

            # Ensure mask_band exists, if specified
            if mask_band:
                if mask_band <= meta['count'] and mask_band > 0:
                    mask_band -= 1
                else:
                    click.echo('Mask band does not exist in INPUT images')
                    raise click.Abort()

        # Initialize output data and create composite
        with rasterio.open(output, 'w', **meta) as dst:
            # Process by block
            dat = np.ma.empty((len(inputs), meta['count'],
                               block_nrow, block_ncol),
                              dtype=np.dtype(meta['dtype']))
            mi, mj = np.meshgrid(np.arange(block_nrow), np.arange(block_ncol),
                                 indexing='ij')
            # Open all source files one time
            srcs = [rasterio.open(fname) for fname in inputs]

            logger.debug('Processing blocks')
            if _has_progressbar and not quiet:
                widgets = [
                    progressbar.Percentage(),
                    progressbar.BouncingBar(
                        marker=progressbar.RotatingMarker())
                ]
                pbar = progressbar.ProgressBar(widgets=widgets).start()

            for i, (idx, window) in enumerate(windows):
                # Update dat and mi, mj only if window changes
                nrow = window[0][1] - window[0][0]
                ncol = window[1][1] - window[1][0]
                if dat.shape[-2] != nrow or dat.shape[-1] != ncol:
                    dat = np.ma.empty((len(inputs), meta['count'],
                                       nrow, ncol),
                                      dtype=np.dtype(meta['dtype']))
                    mi, mj = np.meshgrid(np.arange(nrow), np.arange(ncol),
                                         indexing='ij')
                for j, src in enumerate(srcs):
                    dat[j, ...] = src.read(masked=True, window=window)
                    # Mask values matching mask_vals if mask_band
                    if mask_band and mask_val:
                        dat[j, ...].mask = np.logical_or(
                            dat[j, ...].mask,
                            np.in1d(dat[j, mask_band, ...], mask_val,).reshape(
                                dat.shape[-2], dat.shape[-1])
                        )

                # Find indices of files for composite
                crit = {k: dat[:, v, ...] for k, v in crit_indices.iteritems()}
                crit_idx = snuggs.eval(expr, **crit)

                # Create output composite
                # Use np.rollaxis to get (nimage, nrow, ncol, nband) shape
                composite = np.rollaxis(dat, 1, 4)[crit_idx, mi, mj]

                # Write out
                for i_b in range(composite.shape[-1]):
                    dst.write(composite[:, :, i_b], indexes=i_b + 1,
                              window=window)
                if not quiet and _has_progressbar:
                    pbar.update(int((i + 1) / n_windows * 100))
Пример #2
0
def calc(ctx, command, files, output, driver, name, dtype, masked, overwrite, mem_limit, creation_options):
    """A raster data calculator

    Evaluates an expression using input datasets and writes the result
    to a new dataset.

    Command syntax is lisp-like. An expression consists of an operator
    or function name and one or more strings, numbers, or expressions
    enclosed in parentheses. Functions include ``read`` (gets a raster
    array) and ``asarray`` (makes a 3-D array from 2-D arrays).

    \b
        * (read i) evaluates to the i-th input dataset (a 3-D array).
        * (read i j) evaluates to the j-th band of the i-th dataset (a
          2-D array).
        * (take foo j) evaluates to the j-th band of a dataset named foo
          (see help on the --name option above).
        * Standard numpy array operators (+, -, *, /) are available.
        * When the final result is a list of arrays, a multiple band
          output file is written.
        * When the final result is a single array, a single band output
          file is written.

    Example:

    \b
         $ rio calc "(+ 2 (* 0.95 (read 1)))" tests/data/RGB.byte.tif \\
         > /tmp/out.tif

    The command above produces a 3-band GeoTIFF with all values scaled
    by 0.95 and incremented by 2.

    \b
        $ rio calc "(asarray (+ 125 (read 1)) (read 1) (read 1))" \\
        > tests/data/shade.tif /tmp/out.tif

    The command above produces a 3-band RGB GeoTIFF, with red levels
    incremented by 125, from the single-band input.

    The maximum amount of memory used to perform caculations defaults to
    64 MB. This number can be increased to improve speed of calculation.

    """
    import numpy as np

    dst = None
    sources = []

    try:
        with ctx.obj["env"], ExitStack() as stack:
            output, files = resolve_inout(
                files=files, output=output, overwrite=overwrite
            )
            inputs = [tuple(n.split("=")) for n in name] + [(None, n) for n in files]
            sources = [
                stack.enter_context(rasterio.open(path)) for name, path in inputs
            ]

            first = sources[0]
            kwargs = first.profile
            kwargs.update(**creation_options)
            dtype = dtype or first.meta['dtype']
            kwargs['dtype'] = dtype
            kwargs.pop("driver", None)
            if driver:
                kwargs['driver'] = driver

            snuggs.func_map['read'] = _read_array
            snuggs.func_map['band'] = lambda d, i: _get_bands(inputs, sources, d, i)
            snuggs.func_map['bands'] = lambda d: _get_bands(inputs, sources, d)
            snuggs.func_map['fillnodata'] = lambda *args: fillnodata(*args)
            snuggs.func_map['sieve'] = lambda *args: sieve(*args)

            # The windows iterator is initialized with a single sample.
            # The actual work windows will be added in the second
            # iteration of the loop.
            work_windows = [(None, Window(0, 0, 16, 16))]

            for ij, window in work_windows:
                ctxkwds = OrderedDict()

                for i, ((name, path), src) in enumerate(zip(inputs, sources)):
                    ctxkwds[name or '_i%d' % (i + 1)] = src.read(masked=masked, window=window)

                res = snuggs.eval(command, **ctxkwds)
                results = res.astype(dtype)

                if isinstance(results, np.ma.core.MaskedArray):
                    results = results.filled(float(kwargs['nodata']))
                    if len(results.shape) == 2:
                        results = np.ma.asanyarray([results])
                elif len(results.shape) == 2:
                    results = np.asanyarray([results])

                # The first iteration is only to get sample results and from them
                # compute some properties of the output dataset.
                if dst is None:
                    kwargs['count'] = results.shape[0]
                    dst = rasterio.open(output, 'w', **kwargs)
                    work_windows.extend(
                        _chunk_output(
                            dst.width,
                            dst.height,
                            dst.count,
                            np.dtype(dst.dtypes[0]).itemsize,
                            mem_limit=mem_limit,
                        )
                    )

                # In subsequent iterations we write results.
                else:
                    dst.write(results, window=window)

    except snuggs.ExpressionError as err:
        click.echo("Expression Error:")
        click.echo("  {}".format(err.text))
        click.echo(" {}^".format(" " * err.offset))
        click.echo(err)
        raise click.Abort()

    finally:
        if dst:
            dst.close()
        for src in sources:
            src.close()
Пример #3
0
def rm(project, services):
    """
    Shut down and delete all services and associated data.
    Useful if you'd like to start with a fresh slate.

    The default is everything, however you may pass positional arguments to specify
    an explicit list of services to remove.
    """
    import docker

    client = get_docker_client()

    from sentry.runner import configure

    configure()

    containers = _prepare_containers(project, silent=True)

    if services:
        selected_containers = {}
        for service in services:
            # XXX: This code is also fairly duplicated in here at this point, so dedupe in the future.
            if service not in containers:
                click.secho(
                    "Service `{}` is not known or not enabled.\n".format(service),
                    err=True,
                    fg="red",
                )
                click.secho(
                    "Services that are available:\n" + "\n".join(containers.keys()) + "\n",
                    err=True,
                )
                raise click.Abort()
            selected_containers[service] = containers[service]
        containers = selected_containers

    click.confirm(
        """
This will delete these services and all of their data:

%s

Are you sure you want to continue?"""
        % "\n".join(containers.keys()),
        abort=True,
    )

    for service_name, container_options in containers.items():
        try:
            container = client.containers.get(container_options["name"])
        except docker.errors.NotFound:
            click.secho(
                "> WARNING: non-existent container '%s'" % container_options["name"],
                err=True,
                fg="yellow",
            )
            continue

        click.secho("> Stopping '%s' container" % container_options["name"], err=True, fg="red")
        container.stop()
        click.secho("> Removing '%s' container" % container_options["name"], err=True, fg="red")
        container.remove()

    prefix = project + "_"

    for volume in client.volumes.list():
        if volume.name.startswith(prefix):
            if not services or volume.name[len(prefix) :] in services:
                click.secho("> Removing '%s' volume" % volume.name, err=True, fg="red")
                volume.remove()

    if not services:
        try:
            network = client.networks.get(project)
        except docker.errors.NotFound:
            pass
        else:
            click.secho("> Removing '%s' network" % network.name, err=True, fg="red")
            network.remove()
Пример #4
0
def cleanup(days, project, concurrency, silent, model, router, timed):
    """Delete a portion of trailing data based on creation date.

    All data that is older than `--days` will be deleted.  The default for
    this is 30 days.  In the default setting all projects will be truncated
    but if you have a specific project you want to limit this to this can be
    done with the `--project` flag which accepts a project ID or a string
    with the form `org/project` where both are slugs.
    """
    if concurrency < 1:
        click.echo('Error: Minimum concurrency is 1', err=True)
        raise click.Abort()

    from threading import Thread
    from django.db import router as db_router
    from sentry.app import nodestore
    from sentry.db.deletion import BulkDeleteQuery
    from sentry import deletions
    from sentry import models

    if timed:
        import time
        from sentry.utils import metrics
        start_time = time.time()

    # list of models which this query is restricted to
    model_list = {m.lower() for m in model}

    def is_filtered(model):
        if router is not None and db_router.db_for_write(model) != router:
            return True
        if not model_list:
            return False
        return model.__name__.lower() not in model_list

    # Deletions that use `BulkDeleteQuery` (and don't need to worry about child relations)
    # (model, datetime_field, order_by)
    BULK_QUERY_DELETES = (
        (models.GroupEmailThread, 'date', None),
        (models.GroupRuleStatus, 'date_added', None),
        (models.GroupTagValue, 'last_seen', None),
        (models.TagValue, 'last_seen', None),
        (models.EventTag, 'date_added', 'date_added'),
    )

    # Deletions that use the `deletions` code path (which handles their child relations)
    # (model, datetime_field, order_by)
    DELETES = (
        (models.Event, 'datetime', 'datetime'),
        (models.Group, 'last_seen', 'last_seen'),
    )

    if not silent:
        click.echo('Removing expired values for LostPasswordHash')

    if is_filtered(models.LostPasswordHash):
        if not silent:
            click.echo('>> Skipping LostPasswordHash')
    else:
        models.LostPasswordHash.objects.filter(date_added__lte=timezone.now() -
                                               timedelta(hours=48)).delete()

    for model in [models.ApiGrant, models.ApiToken]:
        if not silent:
            click.echo('Removing expired values for {}'.format(model.__name__))

        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping {}'.format(model.__name__))
        else:
            model.objects.filter(expires_at__lt=timezone.now()).delete()

    project_id = None
    if project:
        click.echo(
            "Bulk NodeStore deletion not available for project selection",
            err=True)
        project_id = get_project(project)
        if project_id is None:
            click.echo('Error: Project not found', err=True)
            raise click.Abort()
    else:
        if not silent:
            click.echo("Removing old NodeStore values")
        else:
            cutoff = timezone.now() - timedelta(days=days)
            try:
                nodestore.cleanup(cutoff)
            except NotImplementedError:
                click.echo(
                    "NodeStore backend does not support cleanup operation",
                    err=True)

    for model, dtfield, order_by in BULK_QUERY_DELETES:
        if not silent:
            click.echo(
                "Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                ))
        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            BulkDeleteQuery(
                model=model,
                dtfield=dtfield,
                days=days,
                project_id=project_id,
                order_by=order_by,
            ).execute()

    for model, dtfield, order_by in DELETES:
        if not silent:
            click.echo(
                "Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                ))

        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            query = {
                '{}__lte'.format(dtfield):
                (timezone.now() - timedelta(days=days)),
            }

            if project_id:
                if 'project' in model._meta.get_all_field_names():
                    query['project'] = project_id
                else:
                    query['project_id'] = project_id

            task = deletions.get(
                model=model,
                query=query,
                order_by=order_by,
                skip_models=[
                    models.Event,
                    models.EventMapping,
                    models.EventTag,
                    models.GroupEmailThread,
                    models.GroupRuleStatus,
                    models.GroupTagValue,
                ],
                transaction_id=uuid4().hex,
            )

            def _chunk_until_complete(num_shards=None, shard_id=None):
                has_more = True
                while has_more:
                    has_more = task.chunk(num_shards=num_shards,
                                          shard_id=shard_id)

            if concurrency > 1:
                threads = []
                for shard_id in range(concurrency):
                    t = Thread(target=(
                        lambda shard_id=shard_id: _chunk_until_complete(
                            num_shards=concurrency, shard_id=shard_id)))
                    t.start()
                    threads.append(t)

                for t in threads:
                    t.join()
            else:
                _chunk_until_complete()

    # EventMapping is fairly expensive and is special cased as it's likely you
    # won't need a reference to an event for nearly as long
    if not silent:
        click.echo("Removing expired values for EventMapping")
    if is_filtered(models.EventMapping):
        if not silent:
            click.echo('>> Skipping EventMapping')
    else:
        BulkDeleteQuery(model=models.EventMapping,
                        dtfield='date_added',
                        days=min(days, 7),
                        project_id=project_id,
                        order_by='-date_added').execute()

    # Clean up FileBlob instances which are no longer used and aren't super
    # recent (as there could be a race between blob creation and reference)
    if not silent:
        click.echo("Cleaning up unused FileBlob references")
    if is_filtered(models.FileBlob):
        if not silent:
            click.echo('>> Skipping FileBlob')
    else:
        cleanup_unused_files(silent)

    if timed:
        duration = int(time.time() - start_time)
        metrics.timing('cleanup.duration', duration, instance=router)
        click.echo("Clean up took %s second(s)." % duration)
Пример #5
0
def cleanup(days, project, concurrency, max_procs, silent, model, router,
            timed):
    """Delete a portion of trailing data based on creation date.

    All data that is older than `--days` will be deleted.  The default for
    this is 30 days.  In the default setting all projects will be truncated
    but if you have a specific project you want to limit this to this can be
    done with the `--project` flag which accepts a project ID or a string
    with the form `org/project` where both are slugs.
    """
    if concurrency < 1:
        click.echo('Error: Minimum concurrency is 1', err=True)
        raise click.Abort()

    import math
    import multiprocessing
    import pickle
    import subprocess
    import sys
    from django.db import router as db_router
    from sentry.app import nodestore
    from sentry.db.deletion import BulkDeleteQuery
    from sentry import models

    if timed:
        import time
        from sentry.utils import metrics
        start_time = time.time()

    # list of models which this query is restricted to
    model_list = {m.lower() for m in model}

    def is_filtered(model):
        if router is not None and db_router.db_for_write(model) != router:
            return True
        if not model_list:
            return False
        return model.__name__.lower() not in model_list

    # Deletions that use `BulkDeleteQuery` (and don't need to worry about child relations)
    # (model, datetime_field, order_by)
    BULK_QUERY_DELETES = [
        (models.GroupEmailThread, 'date', None),
        (models.GroupRuleStatus, 'date_added', None),
    ] + EXTRA_BULK_QUERY_DELETES

    # Deletions that use the `deletions` code path (which handles their child relations)
    # (model, datetime_field, order_by)
    DELETES = (
        (models.Event, 'datetime', 'datetime'),
        (models.Group, 'last_seen', 'last_seen'),
    )

    if not silent:
        click.echo('Removing expired values for LostPasswordHash')

    if is_filtered(models.LostPasswordHash):
        if not silent:
            click.echo('>> Skipping LostPasswordHash')
    else:
        models.LostPasswordHash.objects.filter(date_added__lte=timezone.now() -
                                               timedelta(hours=48)).delete()

    for model in [models.ApiGrant, models.ApiToken]:
        if not silent:
            click.echo('Removing expired values for {}'.format(model.__name__))

        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping {}'.format(model.__name__))
        else:
            model.objects.filter(expires_at__lt=timezone.now()).delete()

    project_id = None
    if project:
        click.echo(
            "Bulk NodeStore deletion not available for project selection",
            err=True)
        project_id = get_project(project)
        if project_id is None:
            click.echo('Error: Project not found', err=True)
            raise click.Abort()
    else:
        if not silent:
            click.echo("Removing old NodeStore values")
        else:
            cutoff = timezone.now() - timedelta(days=days)
            try:
                nodestore.cleanup(cutoff)
            except NotImplementedError:
                click.echo(
                    "NodeStore backend does not support cleanup operation",
                    err=True)

    for model, dtfield, order_by in BULK_QUERY_DELETES:
        if not silent:
            click.echo(
                "Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                ))
        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            BulkDeleteQuery(
                model=model,
                dtfield=dtfield,
                days=days,
                project_id=project_id,
                order_by=order_by,
            ).execute()

    for model, dtfield, order_by in DELETES:
        if not silent:
            click.echo(
                "Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                ))

        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            if concurrency > 1:
                shard_ids = range(concurrency)
                num_procs = min(multiprocessing.cpu_count(), max_procs)
                threads_per_proc = int(
                    math.ceil(concurrency / float(num_procs)))

                pids = []
                for shard_id_chunk in chunker(shard_ids, threads_per_proc):
                    pid = subprocess.Popen([
                        sys.argv[0],
                        'cleanup_chunk',
                        '--days',
                        six.binary_type(days),
                    ] + (
                        ['--project_id',
                         six.binary_type(project_id)] if project_id else []
                    ) + [
                        '--model',
                        pickle.dumps(model),
                        '--dtfield',
                        dtfield,
                        '--order_by',
                        order_by,
                        '--num_shards',
                        six.binary_type(concurrency),
                        '--shard_ids',
                        ",".join([six.binary_type(s) for s in shard_id_chunk]),
                    ])
                    pids.append(pid)

                total_pid_count = len(pids)
                click.echo(
                    "%s concurrent processes forked, waiting on them to complete."
                    % total_pid_count)

                complete = 0
                for pid in pids:
                    pid.wait()
                    complete += 1
                    click.echo("%s/%s concurrent processes are finished." %
                               (complete, total_pid_count))

            else:
                task = create_deletion_task(days, project_id, model, dtfield,
                                            order_by)
                _chunk_until_complete(task)

    # EventMapping is fairly expensive and is special cased as it's likely you
    # won't need a reference to an event for nearly as long
    if not silent:
        click.echo("Removing expired values for EventMapping")
    if is_filtered(models.EventMapping):
        if not silent:
            click.echo('>> Skipping EventMapping')
    else:
        BulkDeleteQuery(model=models.EventMapping,
                        dtfield='date_added',
                        days=min(days, 7),
                        project_id=project_id,
                        order_by='-date_added').execute()

    # Clean up FileBlob instances which are no longer used and aren't super
    # recent (as there could be a race between blob creation and reference)
    if not silent:
        click.echo("Cleaning up unused FileBlob references")
    if is_filtered(models.FileBlob):
        if not silent:
            click.echo('>> Skipping FileBlob')
    else:
        cleanup_unused_files(silent)

    if timed:
        duration = int(time.time() - start_time)
        metrics.timing('cleanup.duration', duration, instance=router)
        click.echo("Clean up took %s second(s)." % duration)
Пример #6
0
def build():
    """
    Build the configuration files
    """
    conf_dir = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..', 'conf'))
    instance_dir = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '..', 'instance'))
    outdir = os.path.join(conf_dir, '..', '_build')

    options_file = os.path.join(instance_dir, 'site.yaml')
    if not os.path.exists(options_file):
        click.echo('ERROR: Could not find %s' % options_file)
        click.echo('...a sample is located in `conf`')
        click.echo(
            '...copy `conf/site.yaml` to your instance folder, and modify it as needed'
        )
        raise click.Abort()

    options = ruamel.yaml.safe_load(open(options_file).read())

    if not os.path.isdir(outdir):
        os.makedirs(outdir)

    env = Environment(loader=FileSystemLoader(conf_dir),
                      undefined=StrictUndefined)

    ###########################################################################
    click.echo('Creating `_build/invoicer-uwsgi.ini')
    template = env.get_template('invoicer-uwsgi.ini.j2')
    content = template.render(**options)
    with open(os.path.join(outdir, 'invoicer-uwsgi.ini'), 'w') as fh:
        fh.write(content)
    click.echo('...done')
    ###########################################################################

    ###########################################################################
    click.echo('Creating `_build/invoicer-systemd.service')
    template = env.get_template('invoicer-systemd.service.j2')
    content = template.render(**options)
    with open(os.path.join(outdir, 'invoicer-systemd.service'), 'w') as fh:
        fh.write(content)
    click.echo('...done')
    ###########################################################################

    ###########################################################################
    click.echo('Creating `_build/invoicer-upstream.nginx')
    template = env.get_template('invoicer-upstream.nginx.j2')
    content = template.render(**options)
    with open(os.path.join(outdir, 'invoicer-upstream.nginx'), 'w') as fh:
        fh.write(content)
    click.echo('...done')

    click.echo('Creating `_build/invoicer-location.nginx')
    template = env.get_template('invoicer-location.nginx.j2')
    content = template.render(**options)
    with open(os.path.join(outdir, 'invoicer-location.nginx'), 'w') as fh:
        fh.write(content)
    click.echo('...done')
    ###########################################################################

    ###########################################################################
    click.echo('Creating `_build/fail2ban/filter.d/invoicer.local')
    f2b_filter_outdir = os.path.join(outdir, 'fail2ban', 'filter.d')
    if not os.path.isdir(f2b_filter_outdir):
        os.makedirs(f2b_filter_outdir)

    template = env.get_template('fail2ban/filter.d/invoicer.local.j2')
    content = template.render(**options)
    with open(os.path.join(f2b_filter_outdir, 'invoicer.local'), 'w') as fh:
        fh.write(content)
    click.echo('...done')
    ###########################################################################

    ###########################################################################
    click.echo('Creating `_build/fail2ban/jail.d/invoicer.local')
    f2b_filter_outdir = os.path.join(outdir, 'fail2ban', 'jail.d')
    if not os.path.isdir(f2b_filter_outdir):
        os.makedirs(f2b_filter_outdir)

    template = env.get_template('fail2ban/jail.d/invoicer.local.j2')
    content = template.render(**options)
    with open(os.path.join(f2b_filter_outdir, 'invoicer.local'), 'w') as fh:
        fh.write(content)
    click.echo('...done')
    ###########################################################################

    ###########################################################################
    click.echo('Creating `_build/deploy.bash')
    template = env.get_template('deploy.bash.j2')
    content = template.render(**options)
    with open(os.path.join(outdir, 'deploy.bash'), 'w') as fh:
        fh.write(content)
    click.echo('...done')
Пример #7
0
def save(
    target: Optional[str],
    name: Optional[str],
    config: Optional[str],
    seed: Optional[str],
    destination: Optional[str],
    multiprocessing: bool,
) -> None:
    """Save the sketch to a SVG file.

    TARGET may either point at a Python file or at a directory and is interpreted in the same
    way as the `vsk run` command (see `vsk run --help`).

    By default, the output is named after the sketch and the provided options. An alternative
    name my be provided with the --name option.

    If the sketch as parameters, their default values are used. Alternatively, a pre-existing
    configuration can be used instead with the --config option.

    By default, a random seed is used for vsketch's random number generator. If --config is
    used, the seed saved in the configuration is used instead. A seed may also be provided with
    the --seed option, in which case it will override the configuration's seed.

    The --seed option also accepts seed range in the form of FIRST..LAST, e.g. 0..100. In this
    case, one output file per seed is generated.

    If the number of files to generate is greater than 4, all available cores are used for the
    process. This behaviour can be disabled with --no-multiprocessing or the
    VSK_MULTIPROCESSING variable.

    By default, all SVG are saved in the sketch's "output" sub-directory. This can be
    overridden using the --destination option.
    """

    try:
        path = _find_sketch_script(target)
    except ValueError as err:
        print_error("Sketch could not be found: ", str(err))
        raise click.Abort()

    # load configuration
    param_set: Dict[str, vsketch.ParamType] = {}
    config_postfix = ""
    if config is not None:
        config_path = pathlib.Path(config)
        if not config_path.exists():
            config_path = get_config_path(path) / (config + ".json")

        if config_path.exists():
            param_set = load_config(config_path)
            config_postfix = "_" + config_path.stem
        else:
            print_error("Config file not found: ", str(config_path))

    # compute name
    if name is None:
        name = canonical_name(path) + config_postfix
    seed_in_name = seed is not None

    if seed is None:
        if param_set is not None and "__seed__" in param_set:
            seed_start = seed_end = int(param_set["__seed__"])
        else:
            seed_start = seed_end = random.randint(0, 2**31 - 1)
    else:
        try:
            seed_start, seed_end = _parse_seed(seed)
        except ValueError as err:
            print_error(f"Could not parse seed {seed}: ", str(err))
            raise click.Abort()

    # prepare output path
    if destination is not None:
        output_path = pathlib.Path(destination)
        if not output_path.exists():
            print_error("Provided output path does not exist: ",
                        str(output_path.absolute()))
            raise click.Abort()
        if not output_path.is_dir():
            print_error("Provided output path is not a directory: ",
                        str(output_path.absolute()))
            raise click.Abort()
    else:
        output_path = path.parent / "output"
        if not output_path.exists():
            output_path.mkdir()
        elif not output_path.is_dir():
            print_error("Could not create output directory: ",
                        str(output_path))
            raise click.Abort()

    # noinspection PyShadowingNames
    def _write_output(seed: int) -> None:
        # this needs to be there because the sketch class cannot be pickled apparently
        sketch_class = load_sketch_class(path)
        if sketch_class is None:
            print_error("Could not load script: ", str(path))
            raise click.Abort()

        sketch_class.set_param_set(param_set)

        output_name = name
        if seed_in_name:
            output_name += "_s" + str(seed)  # type: ignore
        output_name += ".svg"  # type: ignore

        output_file = output_path / output_name

        sketch = sketch_class.execute(finalize=True, seed=seed)

        if sketch is None:
            print_error("Could not execute script: ", str(path))
            raise click.Abort()

        doc = sketch.vsk.document
        with open(output_file, "w") as fp:
            print_info("Exporting SVG: ", str(output_file))
            vp.write_svg(fp,
                         doc,
                         source_string=f"vsketch save -s {seed} {path}",
                         color_mode="layer")

    seed_range = range(seed_start, seed_end + 1)

    if len(seed_range) < 4 or not multiprocessing:
        for s in seed_range:
            _write_output(s)
    else:
        with Pool() as p:
            list(p.imap(_write_output, seed_range))
Пример #8
0
def scale(name, n_workers):
    """Scale a Dask cluster.

    NAME is the name of the cluster to scale.
    Run `daskctl list` for all available options.

    N_WORKERS is the number of workers to scale to.

    """

    try:
        with Progress(
                "[progress.description]{task.description}",
                BarColumn(),
                "[progress.percentage]{task.fields[workers]}/{task.fields[n_workers]}",
                transient=True,
        ) as progress:
            scale_task = progress.add_task("[blue]Preparing to scale...",
                                           start=False,
                                           workers="..",
                                           n_workers="..")
            cluster = get_cluster(name)
            start_workers = len(cluster.scheduler_info["workers"])
            diff_workers = n_workers - start_workers

            if diff_workers != 0:
                progress.update(
                    scale_task,
                    workers=start_workers,
                    n_workers=n_workers,
                    total=abs(diff_workers),
                )
                if diff_workers > 0:
                    progress.update(scale_task,
                                    description="[green]Adding workers...")
                elif diff_workers < 0:
                    progress.update(scale_task,
                                    description="[red]Removing workers...")
                progress.start_task(scale_task)

                cluster.scale(n_workers)

                while len(cluster.scheduler_info["workers"]) != n_workers:
                    sleep(0.1)
                    progress.update(
                        scale_task,
                        completed=abs(
                            len(cluster.scheduler_info["workers"]) -
                            start_workers),
                        workers=len(cluster.scheduler_info["workers"]),
                    )

                progress.update(scale_task, completed=diff_workers)
                progress.console.print(
                    f"Scaled cluster [blue]{name}[/blue] to {n_workers} workers."
                )
            else:
                progress.console.print(
                    f"Cluster [blue]{name}[/blue] already at {n_workers}, nothing to do."
                )

    except Exception as e:
        console.print(e)
        raise click.Abort()
Пример #9
0
    async def _list():
        table = Table(box=box.SIMPLE)
        table.add_column("Name", style="cyan", no_wrap=True)
        table.add_column("Address")
        table.add_column("Type")
        table.add_column("Discovery")
        table.add_column("Workers")
        table.add_column("Threads")
        table.add_column("Memory")
        table.add_column("Created")
        table.add_column("Status")

        with console.status("[bold green]Discovering clusters...") as status:
            discovery_methods = list_discovery_methods()
            for discovery_method in discovery_methods:
                status.update(
                    f"[bold green]Discovering {discovery_method}s...")
                if discovery_methods[discovery_method]["enabled"] and (
                        discovery is None or discovery == discovery_method):
                    try:
                        async for cluster in discover_clusters(
                                discovery=discovery_method):
                            try:
                                workers = cluster.scheduler_info[
                                    "workers"].values()
                            except KeyError:
                                workers = []
                            try:
                                created = format_time_ago(
                                    datetime.datetime.fromtimestamp(
                                        float(cluster.scheduler_info["started"]
                                              )))
                            except KeyError:
                                created = "Unknown"

                            cluster_status = cluster.status.name.title()
                            if cluster.status == Status.created:
                                cluster_status = f"[yellow]{cluster_status}[/yellow]"
                            elif cluster.status == Status.running:
                                cluster_status = f"[green]{cluster_status}[/green]"
                            else:
                                cluster_status = f"[red]{cluster_status}[/red]"

                            table.add_row(
                                cluster.name,
                                cluster.scheduler_address,
                                typename(type(cluster)),
                                discovery_method,
                                str(len(workers)),
                                str(sum(w["nthreads"] for w in workers)),
                                format_bytes(
                                    sum([w["memory_limit"] for w in workers])),
                                created,
                                cluster_status,
                            )
                    except Exception:
                        if discovery is None:
                            console.print(
                                f":warning: Discovery {discovery_method} failed. "
                                f"Run `daskctl cluster list {discovery_method}` for more info.",
                                style="yellow",
                            )
                        else:
                            console.print_exception(show_locals=True)
                            raise click.Abort()

        console.print(table)
Пример #10
0
def error_handler(e):
    sys.stderr.write('ERROR: %s\n' % e)
    raise click.Abort()
Пример #11
0
def site_command(context, destination, exporter, no_stream, pack, safe,
                 sitemap_only, split, template_dir, user_agent, xss, path):
    """
    Validate pages from given sitemap.

    Sitemap path can be an url starting with 'http://' or 'https://' or a
    file path.

    There is multiple exporter formats.

    Note than invalid sitemap path still raise error even with '--safe' option
    is enabled.
    """
    logger = logging.getLogger("py-html-checker")

    logger.debug("Opening sitemap: {}".format(path))

    # Safe mode enabled, catch all internal exceptions
    if safe:
        CatchedException = HtmlCheckerBaseException
    # Safe mode disabled, watch for a dummy exception that won't never occurs
    # so internal exception are still raised
    else:
        CatchedException = HtmlCheckerUnexpectedException

    # Initial tools options
    sitemap_options = {}
    interpreter_options = OrderedDict([])
    tool_options = OrderedDict([])
    exporter_options = {}

    if no_stream:
        tool_options["--no-stream"] = None

    if template_dir:
        exporter_options["template_dir"] = template_dir

    if user_agent:
        sitemap_options["user_agent"] = user_agent
        tool_options["--user-agent"] = user_agent

    if xss:
        key = "-Xss{}".format(xss)
        interpreter_options[key] = None

    # Validate sitemap path
    sitemap_file_status = validate_sitemap_path(logger, path)
    if not sitemap_file_status:
        raise click.Abort()

    # Open sitemap to get paths
    try:
        parser = Sitemap(**sitemap_options)
        paths = parser.get_urls(path)
    except CatchedException as e:
        logger.critical(e)
        raise click.Abort()

    # Ensure to always check a same path only once
    reduced_paths = reduce_unique(paths)

    if len(paths) > len(reduced_paths):
        msg = "Sitemap have {reduced} paths (plus {tweens} ignored duplications)"
        logger.info(
            msg.format(
                **{
                    "reduced": len(reduced_paths),
                    "tweens": len(paths) - len(reduced_paths),
                }))
    else:
        logger.info("Sitemap have {} paths".format(len(reduced_paths)))

    # Proceed to path validations
    if not sitemap_only:
        logger.debug("Launching validation for sitemap items")

        # Start validator interface
        v = ValidatorInterface(exception_class=CatchedException)

        # Start exporter instance
        exporter = get_exporter(exporter)(**exporter_options)
        exporter_error = exporter.validate()
        if exporter_error:
            logger.critical(exporter_error)
            raise click.Abort()
        else:
            if hasattr(exporter, "template_dir"):
                msg = "Using template directory: {}"
                logger.debug(msg.format(exporter.template_dir))

        # Keep packed paths or split them depending 'split' option
        routines = [reduced_paths[:]]
        if split:
            routines = [[v] for v in reduced_paths]

        # Get report from validator process to build export
        for item in routines:
            try:
                report = v.validate(item,
                                    interpreter_options=interpreter_options,
                                    tool_options=tool_options)
                exporter.build(report.registry)
            except CatchedException as e:
                exporter.build({"all": [{
                    "type": "critical",
                    "message": e,
                }]})

        # Release documents if exporter supports it
        export = exporter.release(pack=pack)

        # Some exporter like logging won't return anything to output or write
        if export:
            if destination:
                # Write every document to files in destination directory
                files = write_documents(destination, export)
                for item in files:
                    msg = "Created file: {}".format(item)
                    logger.info(msg)
            else:
                # Print out document
                for doc in export:
                    click.echo(doc["content"])
    # Don't valid anything just list paths
    else:
        logger.debug("Listing available paths from sitemap")

        # Count digits from total path counter
        digits = len(str(len(reduced_paths)))

        for i, item in enumerate(reduced_paths, start=1):
            # Justify indice number with zero(s)
            indice = str(i).rjust(digits, "0")
            logger.info("{}) {}".format(indice, item))
Пример #12
0
def contracts(general_config, actor_options, mode, activate, gas, ignore_deployed, confirmations, parameters):
    """
    Compile and deploy contracts.
    """
    # Init

    emitter = general_config.emitter
    ADMINISTRATOR, _, deployer_interface, local_registry = actor_options.create_actor(emitter)
    chain_name = deployer_interface.client.chain_name

    deployment_parameters = {}
    if parameters:
        with open(parameters) as json_file:
            deployment_parameters = json.load(json_file)

    #
    # Deploy Single Contract (Amend Registry)
    #
    contract_name = actor_options.contract_name
    deployment_mode = constants.__getattr__(mode.upper())  # TODO: constant sorrow
    if contract_name:
        try:
            contract_deployer_class = ADMINISTRATOR.deployers[contract_name]
        except KeyError:
            message = f"No such contract {contract_name}. Available contracts are {ADMINISTRATOR.deployers.keys()}"
            emitter.echo(message, color='red', bold=True)
            raise click.Abort()

        if activate:
            # For the moment, only StakingEscrow can be activated
            staking_escrow_deployer = contract_deployer_class(registry=ADMINISTRATOR.registry,
                                                              deployer_address=ADMINISTRATOR.deployer_address)
            if contract_name != STAKING_ESCROW_CONTRACT_NAME or not staking_escrow_deployer.ready_to_activate:
                raise click.BadOptionUsage(option_name="--activate",
                                           message=f"You can only activate an idle instance of {STAKING_ESCROW_CONTRACT_NAME}")

            click.confirm(f"Activate {STAKING_ESCROW_CONTRACT_NAME} at "
                          f"{staking_escrow_deployer._get_deployed_contract().address}?", abort=True)

            receipts = staking_escrow_deployer.activate()
            for tx_name, receipt in receipts.items():
                paint_receipt_summary(emitter=emitter,
                                      receipt=receipt,
                                      chain_name=chain_name,
                                      transaction_type=tx_name)
            return  # Exit

        # Deploy
        emitter.echo(f"Deploying {contract_name}")
        if contract_deployer_class._upgradeable and deployment_mode is not BARE:
            # NOTE: Bare deployments do not engage the proxy contract
            secret = ADMINISTRATOR.collect_deployment_secret(deployer=contract_deployer_class)
            receipts, agent = ADMINISTRATOR.deploy_contract(contract_name=contract_name,
                                                            plaintext_secret=secret,
                                                            gas_limit=gas,
                                                            deployment_mode=deployment_mode,
                                                            ignore_deployed=ignore_deployed,
                                                            confirmations=confirmations,
                                                            deployment_parameters=deployment_parameters)
        else:
            # Non-Upgradeable or Bare
            receipts, agent = ADMINISTRATOR.deploy_contract(contract_name=contract_name,
                                                            gas_limit=gas,
                                                            deployment_mode=deployment_mode,
                                                            ignore_deployed=ignore_deployed,
                                                            confirmations=confirmations,
                                                            deployment_parameters=deployment_parameters)

        # Report
        paint_contract_deployment(contract_name=contract_name,
                                  contract_address=agent.contract_address,
                                  receipts=receipts,
                                  emitter=emitter,
                                  chain_name=chain_name,
                                  open_in_browser=actor_options.etherscan)
        return  # Exit

    #
    # Deploy Automated Series (Create Registry)
    #
    if deployment_mode is not FULL:
        raise click.BadOptionUsage(option_name='--mode',
                                   message="Only 'full' mode is supported when deploying all network contracts")

    # Confirm filesystem registry writes.
    if os.path.isfile(local_registry.filepath):
        emitter.echo(f"\nThere is an existing contract registry at {local_registry.filepath}.\n"
                     f"Did you mean 'nucypher-deploy upgrade'?\n", color='yellow')
        click.confirm("*DESTROY* existing local registry and continue?", abort=True)
        os.remove(local_registry.filepath)

    # Stage Deployment
    secrets = ADMINISTRATOR.collect_deployment_secrets()
    paint_staged_deployment(deployer_interface=deployer_interface, administrator=ADMINISTRATOR, emitter=emitter)

    # Confirm Trigger Deployment
    if not confirm_deployment(emitter=emitter, deployer_interface=deployer_interface):
        raise click.Abort()

    # Delay - Last chance to abort via KeyboardInterrupt
    paint_deployment_delay(emitter=emitter)

    # Execute Deployment
    deployment_receipts = ADMINISTRATOR.deploy_network_contracts(secrets=secrets,
                                                                 emitter=emitter,
                                                                 interactive=not actor_options.force,
                                                                 etherscan=actor_options.etherscan,
                                                                 ignore_deployed=ignore_deployed)

    # Paint outfile paths
    registry_outfile = local_registry.filepath
    emitter.echo('Generated registry {}'.format(registry_outfile), bold=True, color='blue')

    # Save transaction metadata
    receipts_filepath = ADMINISTRATOR.save_deployment_receipts(receipts=deployment_receipts)
    emitter.echo(f"Saved deployment receipts to {receipts_filepath}", color='blue', bold=True)
Пример #13
0
def cli(ctx,
        amount,
        index,
        stage,
        stepresult,
        formattype,
        select,
        where,
        order,
        outputfile,
        showkeys,
        showvalues,
        showalways,
        position):
    """Export from memory to format supported by tablib"""
    if not ctx.bubble:
        msg = 'There is no bubble present, will not export'
        ctx.say_yellow(msg)
        raise click.Abort()
    path = ctx.home + '/'

    if stage not in STAGES:
        ctx.say_yellow('There is no known stage:' + stage)
        raise click.Abort()
    if stepresult not in exportables:
        ctx.say_yellow('stepresult not one of: ' + ', '.join(exportables))
        raise click.Abort()

    data_gen = bubble_lod_load(ctx, stepresult, stage)

    ctx.gbc.say('data_gen:', stuff=data_gen, verbosity=20)

    part = get_gen_slice(ctx.gbc, data_gen, amount, index)
    ctx.gbc.say('selected part:', stuff=part, verbosity=20)

    aliases = get_pairs(ctx.gbc, select, missing_colon=True)
    if position or len(aliases) == 0:
        ctx.gbc.say('adding position to selection of columns:',
                    stuff=aliases, verbosity=20)
        aliases.insert(0, {'key': buts('index'), 'val': 'BUBBLE_IDX'})
        ctx.gbc.say('added position to selection of columns:',
                    stuff=aliases, verbosity=20)

    wheres = get_pairs(ctx.gbc, where)
    # TODO: use aliases as lookup for wheres

    data = tablib.Dataset()

    data.headers = [sel['val'] for sel in aliases]
    ctx.gbc.say('select wheres:' + str(wheres), verbosity=20)
    ctx.gbc.say('select aliases:' + str(aliases), verbosity=20)
    ctx.gbc.say('select data.headers:' + str(data.headers), verbosity=20)

    not_shown = True
    try:
        for ditem in part:
            row = []
            ctx.gbc.say('curr dict', stuff=ditem, verbosity=101)

            flitem = flat(ctx, ditem)
            ctx.gbc.say('curr flat dict', stuff=flitem, verbosity=101)
            row_ok = True
            for wp in wheres:
                # TODO: negative selects: k:None, k:False,k:Zero,k:Null,k:0,k:-1,k:'',k:"",
                # TODO: negative selects:
                # k:BUBBLE_NO_KEY,k:BUBBLE_NO_VAL,k:BUBBLE_NO_KEY_OR_NO_VAL

                if not wp['val'] in str(flitem[wp['key']]):
                    row_ok = False
            if not row_ok:
                continue

            for sel in aliases:
                if sel['key'] in flitem:
                    row.append(flitem[sel['key']])
                else:
                    # temporary to check, not use case for buts()
                    bnp = '____BTS_NO_PATH_'
                    tempv = get_flat_path(ctx, flitem, sel['key'] + '.*', bnp)
                    if tempv != bnp:
                        row.append(tempv)
                    else:
                        row.append('None')
                        # TODO maybe 'NONE', or just '' or something like:
                        # magic.export_format_none

            data.append(row)
            # todo: count keys, and show all keys in selection: i,a
            if not_shown and showkeys:
                if not showalways:
                    not_shown = False
                ks = list(flitem.keys())
                ks.sort()
                ctx.say(
                    'available dict path keys from first selected dict:', verbosity=0)
                for k in ks:
                    ctx.say('keypath: ' + k, verbosity=0)
                    if showvalues:
                        ctx.say('value: ' + str(flitem[k]) + '\n', verbosity=0)

    except Exception as excpt:
        ctx.say_red('Cannot export data', stuff=excpt)
        raise click.Abort()

    if not outputfile:
        outputfile = path + 'export/export_' + \
            stepresult + '_' + stage + '.' + formattype

    # todo: order key must be present in selection
    # add to selection before
    # and remove from result before output to format.
    if order:
        olast2 = order[-2:]
        ctx.gbc.say('order:' + order + ' last2:' + olast2, verbosity=100)
        if olast2 not in [':+', ':-']:
            data = data.sort(order, False)
        else:
            if olast2 == ':+':
                data = data.sort(order[:-2], False)
            if olast2 == ':-':
                data = data.sort(order[:-2], True)

    # Write `spreadsheet` to disk
    formatted = None
    if formattype == 'yaml':
        formatted = data.yaml
    if formattype == 'json':
        formatted = data.json
    if formattype == 'csv':
        formatted = data.csv

    # TODO:
    # if formattype == 'ldif':
    #    formatted = data.ldif

    if formattype == 'tab':
        # standard, output, whatever tablib makes of it, ascii table
        print(data)

    if formatted:
        enc_formatted = formatted.encode('utf-8')
        of_path = opath.Path(outputfile)
        of_dir = of_path.dirname()
        if not of_dir.exists():
            of_dir.makedirs_p()

        with open(outputfile, 'wb') as f:
            f.write(enc_formatted)
            ctx.say_green('exported: ' + outputfile)
Пример #14
0
def variants(
    case_id,
    institute,
    cancer,
    cancer_sv,
    cancer_research,
    cancer_sv_research,
    sv,
    sv_research,
    snv,
    snv_research,
    str_clinical,
    chrom,
    start,
    end,
    hgnc_id,
    hgnc_symbol,
    rank_treshold,
    force,
    keep_actions,
):
    """Upload variants to a case

    Note that the files has to be linked with the case,
    if they are not use 'scout update case'.
    """
    LOG.info("Running scout load variants")
    adapter = store

    if institute:
        case_id = "{0}-{1}".format(institute, case_id)
    else:
        institute = case_id.split("-")[0]
    case_obj = adapter.case(case_id=case_id)
    if case_obj is None:
        LOG.info("No matching case found")
        raise click.Abort()

    institute_id = case_obj["owner"]
    institute_obj = adapter.institute(institute_id)
    if not institute_obj:
        LOG.info("Institute %s does not exist", institute_id)
        raise click.Abort()

    files = [
        {
            "category": "cancer",
            "variant_type": "clinical",
            "upload": cancer
        },
        {
            "category": "cancer_sv",
            "variant_type": "clinical",
            "upload": cancer_sv
        },
        {
            "category": "cancer",
            "variant_type": "research",
            "upload": cancer_research
        },
        {
            "category": "cancer_sv",
            "variant_type": "research",
            "upload": cancer_sv_research,
        },
        {
            "category": "sv",
            "variant_type": "clinical",
            "upload": sv
        },
        {
            "category": "sv",
            "variant_type": "research",
            "upload": sv_research
        },
        {
            "category": "snv",
            "variant_type": "clinical",
            "upload": snv
        },
        {
            "category": "snv",
            "variant_type": "research",
            "upload": snv_research
        },
        {
            "category": "str",
            "variant_type": "clinical",
            "upload": str_clinical
        },
    ]

    gene_obj = None
    if hgnc_id or hgnc_symbol:
        if hgnc_id:
            gene_obj = adapter.hgnc_gene(hgnc_id, case_obj["genome_build"])
        if hgnc_symbol:
            for res in adapter.gene_aliases(hgnc_symbol,
                                            case_obj["genome_build"]):
                gene_obj = res
        if not gene_obj:
            LOG.warning("The gene could not be found")
            raise click.Abort()

    old_sanger_variants = adapter.case_sanger_variants(case_obj["_id"])
    old_evaluated_variants = None  # acmg, manual rank, cancer tier, dismissed, mosaic, commented

    if keep_actions:  # collect all variants with user actions for this case
        old_evaluated_variants = list(
            adapter.evaluated_variants(case_id, institute_id))

    i = 0
    for file_type in files:
        variant_type = file_type["variant_type"]
        category = file_type["category"]

        if not file_type["upload"]:
            continue

        i += 1
        if variant_type == "research":
            if not (force or case_obj["research_requested"]):
                LOG.warning("research not requested, use '--force'")
                raise click.Abort()

        LOG.info("Delete {0} {1} variants for case {2}".format(
            variant_type, category, case_id))

        adapter.delete_variants(case_id=case_obj["_id"],
                                variant_type=variant_type,
                                category=category)

        LOG.info("Load {0} {1} variants for case {2}".format(
            variant_type, category, case_id))

        try:
            adapter.load_variants(
                case_obj=case_obj,
                variant_type=variant_type,
                category=category,
                rank_threshold=rank_treshold,
                chrom=chrom,
                start=start,
                end=end,
                gene_obj=gene_obj,
                build=case_obj["genome_build"],
            )
            # Update case variants count
            adapter.case_variants_count(case_obj["_id"],
                                        institute_id,
                                        force_update_case=True)

        except Exception as e:
            LOG.warning(e)
            raise click.Abort()

    if i == 0:
        LOG.info("No files where specified to upload variants from")
        return

    # update Sanger status for the new inserted variants
    sanger_updated = adapter.update_case_sanger_variants(
        institute_obj, case_obj, old_sanger_variants)

    if keep_actions and old_evaluated_variants:
        adapter.update_variant_actions(institute_obj, case_obj,
                                       old_evaluated_variants)
Пример #15
0
def show(name):
    """Show a config value."""
    val = getattr(config_lib, name.upper(), None)
    if not val:
        raise click.Abort(f"No config value named {name}")
    click.echo(val)
Пример #16
0
def bond(registry_filepath, eth_provider_uri, signer_uri, operator_address,
         staking_provider, network, force):
    """
    Bond an operator to a staking provider.
    The staking provider must be authorized to use the PREApplication.
    """

    #
    # Setup
    #

    emitter = StdoutEmitter()
    connect_to_blockchain(eth_provider_uri=eth_provider_uri, emitter=emitter)
    if not signer_uri:
        emitter.message('--signer is required', color='red')
        raise click.Abort()
    if not network:
        network = select_network(emitter=emitter,
                                 network_type=NetworksInventory.ETH)

    signer = Signer.from_signer_uri(signer_uri)
    transacting_power = TransactingPower(account=staking_provider,
                                         signer=signer)
    registry = get_registry(network=network,
                            registry_filepath=registry_filepath)
    agent = ContractAgency.get_agent(PREApplicationAgent, registry=registry)

    #
    # Checks
    #

    # Check for authorization
    is_authorized(emitter=emitter,
                  agent=agent,
                  staking_provider=staking_provider)

    # Check bonding
    if is_bonded(agent=agent,
                 staking_provider=staking_provider,
                 return_address=False):
        # operator is already set - check timing
        check_bonding_requirements(emitter=emitter,
                                   agent=agent,
                                   staking_provider=staking_provider)

    # Check for pre-existing staking providers for this operator
    onchain_staking_provider = agent.get_staking_provider_from_operator(
        operator_address=operator_address)
    if onchain_staking_provider != NULL_ADDRESS:
        emitter.message(ALREADY_BONDED.format(
            provider=onchain_staking_provider, operator=operator_address),
                        color='red')
        raise click.Abort()  # dont steal bananas

    # Check that operator is not human
    if staking_provider != operator_address:
        # if the operator has a beneficiary it is the staking provider.
        beneficiary = agent.get_beneficiary(staking_provider=operator_address)
        if beneficiary != NULL_ADDRESS:
            emitter.message(UNEXPECTED_HUMAN_OPERATOR, color='red')
            raise click.Abort()

    #
    # Bond
    #

    if not force:
        click.confirm(CONFIRM_BONDING.format(provider=staking_provider,
                                             operator=operator_address),
                      abort=True)
    transacting_power.unlock(password=get_client_password(
        checksum_address=staking_provider,
        envvar=NUCYPHER_ENVVAR_STAKING_PROVIDER_ETH_PASSWORD))
    emitter.echo(BONDING.format(operator=operator_address))
    receipt = agent.bond_operator(operator=operator_address,
                                  transacting_power=transacting_power,
                                  staking_provider=staking_provider)
    paint_receipt_summary(receipt=receipt, emitter=emitter)
Пример #17
0
 def abort(self, message):
     click.echo('Error: %s' % message, err=True)
     raise click.Abort()
Пример #18
0
def cleanup(days, project, concurrency, silent, model, router):
    """Delete a portion of trailing data based on creation date.

    All data that is older than `--days` will be deleted.  The default for
    this is 30 days.  In the default setting all projects will be truncated
    but if you have a specific project you want to limit this to this can be
    done with the `--project` flag which accepts a project ID or a string
    with the form `org/project` where both are slugs.
    """
    if concurrency < 1:
        click.echo('Error: Minimum concurrency is 1', err=True)
        raise click.Abort()

    from threading import Thread
    from django.db import router as db_router
    from sentry.app import nodestore
    from sentry.db.deletion import BulkDeleteQuery
    from sentry.models import (
        ApiGrant,
        ApiToken,
        Event,
        EventMapping,
        Group,
        GroupRuleStatus,
        GroupTagValue,
        LostPasswordHash,
        TagValue,
        GroupEmailThread,
        FileBlob,
    )

    models = {m.lower() for m in model}

    def is_filtered(model):
        if router is not None and db_router.db_for_write(model) != router:
            return True
        if not models:
            return False
        return model.__name__.lower() not in models

    # these models should be safe to delete without cascades, in order
    BULK_DELETES = (
        (GroupRuleStatus, 'date_added'),
        (GroupTagValue, 'last_seen'),
        (TagValue, 'last_seen'),
        (GroupEmailThread, 'date'),
    )

    GENERIC_DELETES = (
        (Event, 'datetime'),
        (Group, 'last_seen'),
    )

    if not silent:
        click.echo('Removing expired values for LostPasswordHash')

    if is_filtered(LostPasswordHash):
        if not silent:
            click.echo('>> Skipping LostPasswordHash')
    else:
        LostPasswordHash.objects.filter(date_added__lte=timezone.now() -
                                        timedelta(hours=48)).delete()

    for model in [ApiGrant, ApiToken]:
        if not silent:
            click.echo('Removing expired values for {}'.format(model.__name__))

        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping {}'.format(model.__name__))
        else:
            model.objects.filter(expires_at__lt=timezone.now()).delete()

    project_id = None
    if project:
        click.echo(
            "Bulk NodeStore deletion not available for project selection",
            err=True)
        project_id = get_project(project)
        if project_id is None:
            click.echo('Error: Project not found', err=True)
            raise click.Abort()
    else:
        if not silent:
            click.echo("Removing old NodeStore values")
        else:
            cutoff = timezone.now() - timedelta(days=days)
            try:
                nodestore.cleanup(cutoff)
            except NotImplementedError:
                click.echo(
                    "NodeStore backend does not support cleanup operation",
                    err=True)

    for model, dtfield in BULK_DELETES:
        if not silent:
            click.echo(
                "Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                ))
        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            BulkDeleteQuery(
                model=model,
                dtfield=dtfield,
                days=days,
                project_id=project_id,
            ).execute()

    # EventMapping is fairly expensive and is special cased as it's likely you
    # won't need a reference to an event for nearly as long
    if not silent:
        click.echo("Removing expired values for EventMapping")
    if is_filtered(EventMapping):
        if not silent:
            click.echo('>> Skipping EventMapping')
    else:
        BulkDeleteQuery(
            model=EventMapping,
            dtfield='date_added',
            days=min(days, 7),
            project_id=project_id,
        ).execute()

    # Clean up FileBlob instances which are no longer used and aren't super
    # recent (as there could be a race between blob creation and reference)
    if not silent:
        click.echo("Cleaning up unused FileBlob references")
    if is_filtered(FileBlob):
        if not silent:
            click.echo('>> Skipping FileBlob')
    else:
        cleanup_unused_files(silent)

    for model, dtfield in GENERIC_DELETES:
        if not silent:
            click.echo(
                "Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                ))
        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            query = BulkDeleteQuery(
                model=model,
                dtfield=dtfield,
                days=days,
                project_id=project_id,
            )
            if concurrency > 1:
                threads = []
                for shard_id in range(concurrency):
                    t = Thread(target=lambda shard_id=shard_id: query.
                               execute_sharded(concurrency, shard_id))
                    t.start()
                    threads.append(t)

                for t in threads:
                    t.join()
            else:
                query.execute_generic()
Пример #19
0
def sample(ctx, files, bidx):
    """Sample a dataset at one or more points

    Sampling points (x, y) encoded as JSON arrays, in the coordinate
    reference system of the dataset, are read from the second
    positional argument or stdin. Values of the dataset's bands
    are also encoded as JSON arrays and are written to stdout.

    Example:

    \b
        $ cat << EOF | rio sample tests/data/RGB.byte.tif
        > [220650, 2719200]
        > [219650, 2718200]
        > EOF
        [28, 29, 27]
        [25, 29, 19]

    By default, rio-sample will sample all bands. Optionally, bands
    may be specified using a simple syntax:

      --bidx N samples the Nth band (first band is 1).

      --bidx M,N,0 samples bands M, N, and O.

      --bidx M..O samples bands M-O, inclusive.

      --bidx ..N samples all bands up to and including N.

      --bidx N.. samples all bands from N to the end.

    Example:

    \b
        $ cat << EOF | rio sample tests/data/RGB.byte.tif --bidx ..2
        > [220650, 2719200]
        > [219650, 2718200]
        > EOF
        [28, 29]
        [25, 29]

    """
    verbosity = (ctx.obj and ctx.obj.get('verbosity')) or 1
    logger = logging.getLogger('rio')

    files = list(files)
    source = files.pop(0)
    input = files.pop(0) if files else '-'

    # Handle the case of file, stream, or string input.
    try:
        points = click.open_file(input).readlines()
    except IOError:
        points = [input]

    try:
        with rasterio.drivers(CPL_DEBUG=verbosity > 2):
            with rasterio.open(source) as src:
                if bidx is None:
                    indexes = src.indexes
                elif '..' in bidx:
                    start, stop = map(lambda x: int(x) if x else None,
                                      bidx.split('..'))
                    if start is None:
                        start = 1
                    indexes = src.indexes[slice(start - 1, stop)]
                else:
                    indexes = list(map(int, bidx.split(',')))
                for vals in src.sample((json.loads(line) for line in points),
                                       indexes=indexes):
                    click.echo(json.dumps(vals.tolist()))

    except Exception:
        logger.exception("Exception caught during processing")
        raise click.Abort()
Пример #20
0
def cleanup(days, project, concurrency, silent, model, router, timed):
    """Delete a portion of trailing data based on creation date.

    All data that is older than `--days` will be deleted.  The default for
    this is 30 days.  In the default setting all projects will be truncated
    but if you have a specific project you want to limit this to this can be
    done with the `--project` flag which accepts a project ID or a string
    with the form `org/project` where both are slugs.
    """
    if concurrency < 1:
        click.echo('Error: Minimum concurrency is 1', err=True)
        raise click.Abort()

    # Make sure we fork off multiprocessing pool
    # before we import or configure the app
    from multiprocessing import Process, JoinableQueue as Queue

    pool = []
    task_queue = Queue(1000)
    for _ in xrange(concurrency):
        p = Process(target=multiprocess_worker, args=(task_queue,))
        p.daemon = True
        p.start()
        pool.append(p)

    from sentry.runner import configure
    configure()

    from django.db import router as db_router
    from sentry.app import nodestore
    from sentry.db.deletion import BulkDeleteQuery
    from sentry import models

    if timed:
        import time
        from sentry.utils import metrics
        start_time = time.time()

    # list of models which this query is restricted to
    model_list = {m.lower() for m in model}

    def is_filtered(model):
        if router is not None and db_router.db_for_write(model) != router:
            return True
        if not model_list:
            return False
        return model.__name__.lower() not in model_list

    # Deletions that use `BulkDeleteQuery` (and don't need to worry about child relations)
    # (model, datetime_field, order_by)
    BULK_QUERY_DELETES = [
        (models.EventMapping, 'date_added', '-date_added'),
        (models.EventAttachment, 'date_added', None),
        (models.UserReport, 'date_added', None),
        (models.GroupHashTombstone, 'deleted_at', None),
        (models.GroupEmailThread, 'date', None),
        (models.GroupRuleStatus, 'date_added', None),
    ] + EXTRA_BULK_QUERY_DELETES

    # Deletions that use the `deletions` code path (which handles their child relations)
    # (model, datetime_field, order_by)
    DELETES = (
        (models.Event, 'datetime', 'datetime'),
        (models.Group, 'last_seen', 'last_seen'),
    )

    if not silent:
        click.echo('Removing expired values for LostPasswordHash')

    if is_filtered(models.LostPasswordHash):
        if not silent:
            click.echo('>> Skipping LostPasswordHash')
    else:
        models.LostPasswordHash.objects.filter(
            date_added__lte=timezone.now() - timedelta(hours=48)
        ).delete()

    for model in [models.ApiGrant, models.ApiToken]:
        if not silent:
            click.echo(u'Removing expired values for {}'.format(model.__name__))

        if is_filtered(model):
            if not silent:
                click.echo(u'>> Skipping {}'.format(model.__name__))
        else:
            model.objects.filter(expires_at__lt=timezone.now()).delete()

    project_id = None
    if project:
        click.echo(
            "Bulk NodeStore deletion not available for project selection", err=True)
        project_id = get_project(project)
        if project_id is None:
            click.echo('Error: Project not found', err=True)
            raise click.Abort()
    else:
        if not silent:
            click.echo("Removing old NodeStore values")

        cutoff = timezone.now() - timedelta(days=days)
        try:
            nodestore.cleanup(cutoff)
        except NotImplementedError:
            click.echo(
                "NodeStore backend does not support cleanup operation", err=True)

    for bqd in BULK_QUERY_DELETES:
        if len(bqd) == 4:
            model, dtfield, order_by, chunk_size = bqd
        else:
            chunk_size = 10000
            model, dtfield, order_by = bqd

        if not silent:
            click.echo(
                u"Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                )
            )
        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            BulkDeleteQuery(
                model=model,
                dtfield=dtfield,
                days=days,
                project_id=project_id,
                order_by=order_by,
            ).execute(chunk_size=chunk_size)

    for model, dtfield, order_by in DELETES:
        if not silent:
            click.echo(
                u"Removing {model} for days={days} project={project}".format(
                    model=model.__name__,
                    days=days,
                    project=project or '*',
                )
            )

        if is_filtered(model):
            if not silent:
                click.echo('>> Skipping %s' % model.__name__)
        else:
            imp = '.'.join((model.__module__, model.__name__))

            q = BulkDeleteQuery(
                model=model,
                dtfield=dtfield,
                days=days,
                project_id=project_id,
                order_by=order_by,
            )

            for chunk in q.iterator(chunk_size=100):
                task_queue.put((imp, chunk))

            task_queue.join()

    # Clean up FileBlob instances which are no longer used and aren't super
    # recent (as there could be a race between blob creation and reference)
    if not silent:
        click.echo("Cleaning up unused FileBlob references")
    if is_filtered(models.FileBlob):
        if not silent:
            click.echo('>> Skipping FileBlob')
    else:
        cleanup_unused_files(silent)

    # Shut down our pool
    for _ in pool:
        task_queue.put(_STOP_WORKER)

    # And wait for it to drain
    for p in pool:
        p.join()

    if timed:
        duration = int(time.time() - start_time)
        metrics.timing('cleanup.duration', duration, instance=router)
        click.echo("Clean up took %s second(s)." % duration)
Пример #21
0
        if not institute:
            click.echo("Please specify the owner of the case that should be "
                       "deleted with flag '-i/--institute'.")
            raise click.Abort()

    LOG.info("Running deleting case {0}".format(case_id))
    case = adapter.delete_case(case_id=case_id,
                               institute_id=institute,
                               display_name=display_name)

    if case.deleted_count == 1:
        adapter.delete_variants(case_id=case_id, variant_type='clinical')
        adapter.delete_variants(case_id=case_id, variant_type='research')
    else:
        LOG.warning("Case does not exist in database")
        raise click.Abort()


# @click.command('diseases', short_help='Display all diseases')
# @click.pass_context
# def diseases(context):
#     """Show all diseases in the database"""
#     LOG.info("Running scout view diseases")
#     adapter = context.obj['adapter']
#
#     click.echo("Disease")
#     for disease_obj in adapter.disease_terms():
#         click.echo("{0}:{1}".format(
#             disease_obj['source'],
#             disease_obj['disease_id'],
#         ))
Пример #22
0
def case(
    context,
    case_id,
    case_name,
    institute,
    collaborator,
    vcf,
    vcf_sv,
    vcf_cancer,
    vcf_cancer_sv,
    vcf_research,
    vcf_sv_research,
    vcf_cancer_research,
    vcf_cancer_sv_research,
    reupload_sv,
    rankscore_treshold,
    rankmodel_version,
    sv_rankmodel_version,
):
    """
    Update a case in the database
    """
    adapter = store

    if not case_id:
        if not (case_name and institute):
            LOG.info(
                "Please specify either a case ID or both case name and institute ID for the case that should be updated."
            )
            raise click.Abort()

    # Check if the case exists
    case_obj = adapter.case(case_id=case_id,
                            institute_id=institute,
                            display_name=case_name)

    if not case_obj:
        LOG.warning("Case %s could not be found", case_id)
        context.abort()

    case_changed = False
    if collaborator:
        if not adapter.institute(collaborator):
            LOG.warning("Institute %s could not be found", collaborator)
            return
        if not collaborator in case_obj["collaborators"]:
            case_changed = True
            case_obj["collaborators"].append(collaborator)
            LOG.info("Adding collaborator %s", collaborator)

    if vcf:
        LOG.info("Updating 'vcf_snv' to %s", vcf)
        case_obj["vcf_files"]["vcf_snv"] = vcf
        case_changed = True
    if vcf_sv:
        LOG.info("Updating 'vcf_sv' to %s", vcf_sv)
        case_obj["vcf_files"]["vcf_sv"] = vcf_sv
        case_changed = True
    if vcf_cancer:
        LOG.info("Updating 'vcf_cancer' to %s", vcf_cancer)
        case_obj["vcf_files"]["vcf_cancer"] = vcf_cancer
        case_changed = True
    if vcf_cancer_sv:
        LOG.info("Updating 'vcf_cancer_sv' to %s", vcf_cancer_sv)
        case_obj["vcf_files"]["vcf_cancer_sv"] = vcf_cancer_sv
        case_changed = True
    if vcf_research:
        LOG.info("Updating 'vcf_research' to %s", vcf_research)
        case_obj["vcf_files"]["vcf_research"] = vcf_research
        case_changed = True
    if vcf_sv_research:
        LOG.info("Updating 'vcf_sv_research' to %s", vcf_sv_research)
        case_obj["vcf_files"]["vcf_sv_research"] = vcf_sv_research
        case_changed = True
    if vcf_cancer_research:
        LOG.info("Updating 'vcf_cancer_research' to %s", vcf_cancer_research)
        case_obj["vcf_files"]["vcf_cancer_research"] = vcf_cancer_research
        case_changed = True
    if vcf_cancer_sv_research:
        LOG.info("Updating 'vcf_cancer_sv_research' to %s",
                 vcf_cancer_sv_research)
        case_obj["vcf_files"][
            "vcf_cancer_sv_research"] = vcf_cancer_sv_research
        case_changed = True

    if case_changed:
        adapter.update_case(case_obj)

    if reupload_sv:
        LOG.info("Set needs_check to True for case %s", case_id)
        updates = {"needs_check": True}
        if sv_rankmodel_version:
            updates["sv_rank_model_version"] = str(sv_rankmodel_version)
        if vcf_sv:
            updates["vcf_files.vcf_sv"] = vcf_sv
        if vcf_sv:
            updates["vcf_files.vcf_sv_research"] = vcf_sv_research

        updated_case = adapter.case_collection.find_one_and_update(
            {"_id": case_id},
            {"$set": updates},
            return_document=pymongo.ReturnDocument.AFTER,
        )
        rankscore_treshold = rankscore_treshold or updated_case.get(
            "rank_score_threshold", 5)
        # Delete and reload the clinical SV variants
        if updated_case["vcf_files"].get("vcf_sv"):
            adapter.delete_variants(case_id,
                                    variant_type="clinical",
                                    category="sv")
            adapter.load_variants(
                updated_case,
                variant_type="clinical",
                category="sv",
                rank_threshold=int(rankscore_treshold),
            )
        # Delete and reload research SV variants
        if updated_case["vcf_files"].get("vcf_sv_research"):
            adapter.delete_variants(case_id,
                                    variant_type="research",
                                    category="sv")
            if updated_case.get("is_research"):
                adapter.load_variants(
                    updated_case,
                    variant_type="research",
                    category="sv",
                    rank_threshold=int(rankscore_treshold),
                )
        # Update case variants count
        adapter.case_variants_count(case_obj["_id"],
                                    case_obj["owner"],
                                    force_update_case=True)
Пример #23
0
def reset(unsafe, yes):
    """
    Destroys and recreates the Prefect Server database.

    WARNING: ALL DATA IS LOST.
    """

    if unsafe and yes:
        click.secho(
            "Not permitted to automatically disregard safety precautions. Use either --unsafe or --yes, not both.",
            bg="red",
            bold=True,
        )
        raise click.Abort()

    if not unsafe:
        # we only want to allow communication with a hasura/postgres instance that is positively local
        # or probably in a docker-compose maintained container (where the hosts are named from the service names).
        # There is no definitive way to easily verify the docker container assumption, this is enforced by
        # convention via a common service name.
        hasura_hosts = ("localhost", "hasura", "127.0.0.1")
        postgres_hosts = ("localhost", "postgres", "127.0.0.1")

        if config.hasura.host not in hasura_hosts:
            click.secho(
                "Failed safety check: bad 'hasura.host': '{}'\n  expected to be one of: {}".format(
                    config.hasura.host, ",".join(repr(c) for c in hasura_hosts),
                ),
                bg="red",
                bold=True,
            )
            raise click.Abort()

        # only allow graphql comms to a approved hasura instances:
        # 1) over http (not https). Https is an indirect indication that this could be a GCP environment.
        # 2) the hasura host is one of the approved hasura instances
        found_valid_candidate = False
        valid_candidates = []
        for candidate in hasura_hosts:
            expected = "http://{}:{}".format(candidate, config.hasura.port)
            valid_candidates.append(expected)
            if expected in config.hasura.graphql_url:
                found_valid_candidate = True
                break

        if not found_valid_candidate:
            click.secho(
                "Failed safety check: bad 'hasura.graphql_url': '{}'\n  expected to contain one of: {}".format(
                    config.hasura.graphql_url,
                    ",".join(repr(c) for c in valid_candidates),
                ),
                bg="red",
                bold=True,
            )
            raise click.Abort()

        # only allow comms to an approved postgres instance. The `database.connection_url` is used as a default
        # for other config variables. This verification is validating the host and port of a database connection string
        # (only after the '@' host declaration)
        found_valid_candidate = False
        valid_candidates = []
        for candidate in postgres_hosts:
            expected = "@{}:5432".format(candidate)
            valid_candidates.append(expected)
            if expected in prefect_server.config.database.connection_url:
                found_valid_candidate = True
                break

        if not found_valid_candidate:
            click.secho(
                "Failed safety check: bad 'database.connection_url': '{}'\n  expected to contain one of: {}".format(
                    config.database.connection_url,
                    ",".join(repr(c) for c in valid_candidates),
                ),
                bg="red",
                bold=True,
            )
            raise click.Abort()

        # only allow hasura comms to an approved postgres instance. This verification is validating the host and
        # port of a database connection string (only after the '@' host declaration)
        found_valid_candidate = False
        valid_candidates = []
        for candidate in postgres_hosts:
            expected = "@{}:5432".format(candidate)
            valid_candidates.append(expected)
            if expected in config.hasura.db_url:
                found_valid_candidate = True
                break

        if not found_valid_candidate:
            click.secho(
                "Failed safety check: bad 'hasura.db_url': '{}'\n  expected to contain one of: {}".format(
                    config.hasura.db_url, ",".join(repr(c) for c in valid_candidates),
                ),
                bg="red",
                bold=True,
            )
            raise click.Abort()
    else:
        click.secho(
            "Warning: Elected to reset database WITHOUT safety precautions!\n",
            bg="red",
            bold=True,
        )

    if not yes:
        if not click.confirm(
            click.style(
                "Are you sure you want to reset the database?", bg="red", bold=True
            )
        ):
            return
    try:
        CliRunner().invoke(prefect_server.cli.hasura.clear_metadata)
        alembic_downgrade(apply_hasura_metadata=False)
        alembic_upgrade(apply_hasura_metadata=True)
        click.secho("\nDatabase reset!", fg="green")
    except Exception as e:
        click.secho("\nCould not reset the database!", bg="red", bold=True)
        raise click.ClickException(e)
Пример #24
0
def collect_year(year, timespan, n_months, force):
    """Extract the stats librarian for one year and store them in db.

    :param year: year of statistics
    :param n_months: month up to which the statistics are calculated
    :param timespan: time interval, can be 'montly' or 'yearly'
    :param force: force update of stat.
    """
    stat_pid = None
    type = 'librarian'
    if year:
        if timespan == 'montly':
            if n_months not in range(1, 13):
                click.secho(f'ERROR: not a valid month', fg='red')
                raise click.Abort()
            n_months += 1

            for month in range(1, n_months):
                first_day = f'{year}-{month:02d}-01T23:59:59'\
                            .format(fmt='YYYY-MM-DDT23:59:59')
                first_day = arrow.get(first_day, 'YYYY-MM-DDTHH:mm:ss')
                to_date = first_day + relativedelta(months=1)\
                    - relativedelta(days=1)
                _from = f'{to_date.year}-{to_date.month:02d}-01T00:00:00'
                _to = to_date.format(fmt='YYYY-MM-DDT23:59:59')

                date_range = {'from': _from, 'to': _to}

                _stats = StatsForLibrarian(to_date=to_date)

                stat_pid = _stats.get_stat_pid(type, date_range)

                if stat_pid and not force:
                    click.secho(f'ERROR: statistics of type {type}\
                                for time interval {_from} - {_to}\
                                already exist. Pid: {stat_pid}',
                                fg='red')
                    return

                stat_data = dict(type=type,
                                 date_range=date_range,
                                 values=_stats.collect())

                with current_app.app_context():
                    if stat_pid:
                        rec_stat = Stat.get_record_by_pid(stat_pid)
                        stat = rec_stat.update(data=stat_data,
                                               commit=True,
                                               dbcommit=True,
                                               reindex=True)
                        click.secho(f'WARNING: statistics of type {type}\
                                have been collected and updated\
                                for {year}-{month}.\
                                Pid: {stat.pid}',
                                    fg='yellow')
                    else:
                        stat = Stat.create(stat_data,
                                           dbcommit=True,
                                           reindex=True)
                        click.secho(
                            f'Statistics of type {type} have been collected\
                                and created for {year}-{month}.\
                                New pid: {stat.pid}',
                            fg='green')
        else:
            _from = arrow.get(f'{year}-01-01', 'YYYY-MM-DD')\
                         .format(fmt='YYYY-MM-DDT00:00:00')
            _to = arrow.get(f'{year}-12-31', 'YYYY-MM-DD')\
                       .format(fmt='YYYY-MM-DDT23:59:59')
            date_range = {'from': _from, 'to': _to}

            _stats = StatsForLibrarian()

            _stats.date_range = {'gte': _from, 'lte': _to}

            stat_pid = _stats.get_stat_pid(type, date_range)
            if stat_pid and not force:
                click.secho(f'ERROR: statistics of type {type}\
                        for time interval {_from} - {_to}\
                        already exist. Pid: {stat_pid}',
                            fg='red')
                return

            stat_data = dict(type=type,
                             date_range=date_range,
                             values=_stats.collect())

            with current_app.app_context():
                if stat_pid:
                    rec_stat = Stat.get_record_by_pid(stat_pid)
                    stat = rec_stat.update(data=stat_data,
                                           commit=True,
                                           dbcommit=True,
                                           reindex=True)
                    click.secho(f'WARNING: statistics of type {type}\
                            have been collected and updated for {year}.\
                            Pid: {stat.pid}',
                                fg='yellow')
                else:
                    stat = Stat.create(stat_data, dbcommit=True, reindex=True)
                    click.secho(
                        f'Statistics of type {type} have been collected and\
                            created for {year}.\
                            New pid: {stat.pid}',
                        fg='green')

        return
Пример #25
0
def _no_json(cfg):
    """
    Internal helper, marking this command as not allowing --json
    """
    if cfg.json:
        raise click.Abort("Doesn't accept --json flag")
Пример #26
0
def variants(case_id, institute, force, cancer, cancer_research, sv,
             sv_research, snv, snv_research, str_clinical, chrom, start, end, hgnc_id,
             hgnc_symbol, rank_treshold):
    """Upload variants to a case

        Note that the files has to be linked with the case,
        if they are not use 'scout update case'.
    """
    LOG.info("Running scout load variants")
    adapter = store

    if institute:
        case_id = "{0}-{1}".format(institute, case_id)
    else:
        institute = case_id.split('-')[0]
    case_obj = adapter.case(case_id=case_id)
    if case_obj is None:
        LOG.info("No matching case found")
        raise click.Abort()

    files = [
        {'category': 'cancer', 'variant_type': 'clinical', 'upload': cancer},
        {'category': 'cancer', 'variant_type': 'research', 'upload': cancer_research},
        {'category': 'sv', 'variant_type': 'clinical', 'upload': sv},
        {'category': 'sv', 'variant_type': 'research', 'upload': sv_research},
        {'category': 'snv', 'variant_type': 'clinical', 'upload': snv},
        {'category': 'snv', 'variant_type': 'research', 'upload': snv_research},
        {'category': 'str', 'variant_type': 'clinical', 'upload': str_clinical},
    ]

    gene_obj = None
    if (hgnc_id or hgnc_symbol):
        if hgnc_id:
            gene_obj = adapter.hgnc_gene(hgnc_id)
        if hgnc_symbol:
            for res in adapter.gene_by_alias(hgnc_symbol):
                gene_obj = res
        if not gene_obj:
            LOG.warning("The gene could not be found")
            raise click.Abort()

    i = 0
    for file_type in files:
        variant_type = file_type['variant_type']
        category = file_type['category']

        if file_type['upload']:
            i += 1
            if variant_type == 'research':
                if not (force or case_obj['research_requested']):
                    LOG.warn("research not requested, use '--force'")
                    raise click.Abort()

            LOG.info("Delete {0} {1} variants for case {2}".format(
                         variant_type, category, case_id))
            adapter.delete_variants(case_id=case_obj['_id'],
                                    variant_type=variant_type,
                                    category=category)

            LOG.info("Load {0} {1} variants for case {2}".format(
                         variant_type, category, case_id))

            try:
                adapter.load_variants(
                    case_obj=case_obj,
                    variant_type=variant_type,
                    category=category,
                    rank_threshold=rank_treshold,
                    chrom=chrom,
                    start=start,
                    end=end,
                    gene_obj=gene_obj
                )
            except Exception as e:
                LOG.warning(e)
                raise click.Abort()
    if i == 0:
        LOG.info("No files where specified to upload variants from")
Пример #27
0
def push(cfg, ctx, quiet, apikey, update, overwrite, files):
    """Pushes metadata to a richard instance."""
    if not quiet:
        click.echo(VERSION)

    # Get username, api_url and api_key.

    username = get_from_config(cfg, 'username')
    api_url = get_from_config(cfg, 'api_url')

    # Command line api_key overrides config-set api_key
    if not apikey:
        try:
            apikey = cfg.get('project', 'api_key')
        except ConfigParser.NoOptionError:
            pass
    if not apikey:
        raise click.ClickException(
            u'Specify an api key either in {0}, on command line, '
            u'or in API_KEY file.'.format(get_project_config_file_name())
        )

    if not username or not api_url or not apikey:
        raise click.ClickException(u'Missing username, api_url or api_key.')

    data = load_json_files(cfg)

    if files:
        data = [(fn, contents) for fn, contents in data if fn in files]

    # There are two modes:
    #
    # 1. User set category in configuration. Then the json files can
    #    either have no category set or they have to have the same
    #    category set.
    #
    # 2. User has NOT set category in configuration. Then the json
    #    files must all have the category set. The categories can be
    #    different.
    #
    # Go through and make sure there aren't any problems with
    # categories.

    all_categories = dict(
        [(cat['title'], cat)
         for cat in steve.richardapi.get_all_categories(api_url)])

    try:
        category = cfg.get('project', 'category')
        category = category.strip()
        if category not in all_categories:
            raise click.ClickException(
                u'Category "{0}" does not exist on server. Build it there '
                u'first.'.format(category)
            )
        else:
            click.echo('Category {0} exists on site.'.format(category))
    except ConfigParser.NoOptionError:
        category = None

    errors = []
    for fn, contents in data:
        if category is None:
            this_cat = contents.get('category')
            if not this_cat:
                errors.append(
                    u'No category set in configuration and {0} has no '
                    u'category set.'.format(fn)
                )
            elif this_cat != this_cat.strip():
                errors.append(
                    u'Category "{0}" has whitespace at beginning or '
                    u'end.'.format(this_cat)
                )
            elif this_cat not in all_categories:
                errors.append(
                    u'Category "{0}" does not exist on server. '
                    u'Build it there first.'.format(this_cat)
                )

        else:
            this_cat = contents.get('category')
            if this_cat is not None and str(this_cat).strip() != category:
                errors.append(
                    u'Category set in configuration ({0}), but {1} has '
                    u'different category ({2}).'.format(category, fn, this_cat)
                )

    if update:
        for fn, contents in data:
            if 'id' not in contents:
                errors.append(
                    u'id not in contents for "{0}".'.format(fn)
                )

    if errors:
        raise click.ClickException('\n'.join(errors))

    # Everything looks ok. So double-check with the user and push.

    click.echo('Pushing to: {0}'.format(api_url))
    click.echo('Username:   {0}'.format(username))
    click.echo('api_key:    {0}'.format(apikey))
    click.echo('update?:    {0}'.format(update))
    click.echo('# videos:   {0}'.format(len(data)))
    click.echo('Once you push, you can not undo it. Push for realz? Y/N')
    if not raw_input().strip().lower().startswith('y'):
        raise click.Abort()

    for fn, contents in data:
        contents['category'] = category or contents.get('category')

        if not update:
            # Nix any id field since that causes problems.
            if 'id' in contents:
                if not overwrite:
                    click.echo(u'Skipping... already exists.')
                    continue
                del contents['id']

            click.echo('Pushing {0}'.format(fn))
            try:
                vid = steve.richardapi.create_video(api_url, apikey, contents)

                if 'id' in vid:
                    contents['id'] = vid['id']
                    click.echo('   Now has id {0}'.format(vid['id']))
                else:
                    click.echo('   Errors?: {0}'.format(vid), err=True)
            except steve.restapi.RestAPIException as exc:
                click.echo('   Error?: {0}'.format(exc), err=True)
                click.echo('   "{0}"'.format(exc.response.content), err=True)

        else:
            click.echo('Updating {0} "{1}" ({2})'.format(
                contents['id'], contents['title'], fn))
            try:
                vid = steve.richardapi.update_video(
                    api_url, apikey, contents['id'], contents)
            except steve.restapi.RestAPIException as exc:
                click.echo('   Error?: {0}'.format(exc), err=True)
                click.echo('   "{0}"'.format(exc.response.content), err=True)

        save_json_file(cfg, fn, contents)
Пример #28
0
def genes(build, api_key):
    """
    Load the hgnc aliases to the mongo database.
    """
    LOG.info("Running scout update genes")
    adapter = store

    # Fetch the omim information
    api_key = api_key or current_app.config.get("OMIM_API_KEY")
    mim_files = {}
    if not api_key:
        LOG.warning(
            "No omim api key provided, Please not that some information will be missing"
        )

    else:
        try:
            mim_files = fetch_mim_files(
                api_key, mim2genes=True, morbidmap=True, genemap2=True
            )
        except Exception as err:
            LOG.warning(err)
            raise click.Abort()

    LOG.warning("Dropping all gene information")
    adapter.drop_genes(build)
    LOG.info("Genes dropped")
    LOG.warning("Dropping all transcript information")
    adapter.drop_transcripts(build)
    LOG.info("transcripts dropped")

    hpo_genes = fetch_genes_to_hpo_to_disease()

    if build:
        builds = [build]
    else:
        builds = ["37", "38"]

    hgnc_lines = fetch_hgnc()
    exac_lines = fetch_exac_constraint()

    for build in builds:
        ensembl_genes = fetch_ensembl_genes(build=build)

        # load the genes
        hgnc_genes = load_hgnc_genes(
            adapter=adapter,
            ensembl_lines=ensembl_genes,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim_files.get("mim2genes"),
            genemap_lines=mim_files.get("genemap2"),
            hpo_lines=hpo_genes,
            build=build,
        )

        ensembl_genes = {}
        for gene_obj in hgnc_genes:
            ensembl_id = gene_obj["ensembl_id"]
            ensembl_genes[ensembl_id] = gene_obj

        # Fetch the transcripts from ensembl
        ensembl_transcripts = fetch_ensembl_transcripts(build=build)

        transcripts = load_transcripts(
            adapter, ensembl_transcripts, build, ensembl_genes
        )

    adapter.update_indexes()

    LOG.info("Genes, transcripts and Exons loaded")
Пример #29
0
def _parse_file(filename):
    """Get data from file"""
    defaults = {}
    notes = []
    note = {}
    codeblock = False
    field = None
    for line in open(filename, 'r'):
        if codeblock:
            if field:
                note['fields'][field] += line
            match = re.match(r'```\s*$', line)
            if match:
                codeblock = False
            continue

        match = re.match(r'```\w*\s*$', line)
        if match:
            codeblock = True
            if field:
                note['fields'][field] += line
            continue

        if not field:
            match = re.match(r'(\w+): (.*)', line)
            if match:
                k, v = match.groups()
                k = k.lower()
                if k == 'tag':
                    k = 'tags'
                note[k] = v.strip()
                continue

        match = re.match(r'(#+)\s*(.*)', line)
        if not match:
            if field:
                note['fields'][field] += line
            continue

        level, title = match.groups()

        if len(level) == 1:
            if note:
                if field:
                    note['fields'][field] = note['fields'][field].strip()
                    notes.append(note)
                else:
                    defaults.update(note)

            note = {'title': title, 'fields': {}}
            field = None
            continue

        if len(level) == 2:
            if field:
                note['fields'][field] = note['fields'][field].strip()

            if title in note:
                click.echo(f'Error when parsing {filename}!')
                raise click.Abort()

            field = title
            note['fields'][field] = ''

    if note and field:
        note['fields'][field] = note['fields'][field].strip()
        notes.append(note)

    return defaults, notes
Пример #30
0
def upgrade_docker(container_name, url, cleanup_image, skip_check, tag, warm):
    """ Upgrade docker image from local binary or URL"""

    image_name = get_container_image_name(container_name)
    image_latest = image_name + ":latest"
    image_id_previous = get_container_image_id(image_latest)

    DEFAULT_IMAGE_PATH = os.path.join("/tmp/", image_name)
    if url.startswith('http://') or url.startswith('https://'):
        click.echo('Downloading image...')
        validate_url_or_abort(url)
        try:
            urllib.urlretrieve(url, DEFAULT_IMAGE_PATH, reporthook)
        except Exception as e:
            click.echo("Download error", e)
            raise click.Abort()
        image_path = DEFAULT_IMAGE_PATH
    else:
        image_path = os.path.join("./", url)

    # Verify that the local file exists and is a regular file
    # TODO: Verify the file is a *proper Docker image file*
    if not os.path.isfile(image_path):
        click.echo(
            "Image file '{}' does not exist or is not a regular file. Aborting..."
            .format(image_path))
        raise click.Abort()

    warm_configured = False
    # warm restart enable/disable config is put in stateDB, not persistent across cold reboot, not saved to config_DB.json file
    state_db = SonicV2Connector(host='127.0.0.1')
    state_db.connect(state_db.STATE_DB, False)
    TABLE_NAME_SEPARATOR = '|'
    prefix = 'WARM_RESTART_ENABLE_TABLE' + TABLE_NAME_SEPARATOR
    _hash = '{}{}'.format(prefix, container_name)
    if state_db.get(state_db.STATE_DB, _hash, "enable") == "true":
        warm_configured = True
    state_db.close(state_db.STATE_DB)

    if container_name == "swss" or container_name == "bgp" or container_name == "teamd":
        if warm_configured == False and warm:
            run_command("config warm_restart enable %s" % container_name)

    # Fetch tag of current running image
    tag_previous = get_docker_tag_name(image_latest)
    # Load the new image beforehand to shorten disruption time
    run_command("docker load < %s" % image_path)
    warm_app_names = []
    # warm restart specific procssing for swss, bgp and teamd dockers.
    if warm_configured == True or warm:
        # make sure orchagent is in clean state if swss is to be upgraded
        if container_name == "swss":
            skipPendingTaskCheck = ""
            if skip_check:
                skipPendingTaskCheck = " -s"

            cmd = "docker exec -i swss orchagent_restart_check -w 2000 -r 5 " + skipPendingTaskCheck

            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
            (out, err) = proc.communicate()
            if proc.returncode != 0:
                if not skip_check:
                    click.echo(
                        "Orchagent is not in clean state, RESTARTCHECK failed")
                    # Restore orignal config before exit
                    if warm_configured == False and warm:
                        run_command("config warm_restart disable %s" %
                                    container_name)
                    # Clean the image loaded earlier
                    image_id_latest = get_container_image_id(image_latest)
                    run_command("docker rmi -f %s" % image_id_latest)
                    # Re-point latest tag to previous tag
                    run_command("docker tag %s:%s %s" %
                                (image_name, tag_previous, image_latest))

                    sys.exit(proc.returncode)
                else:
                    click.echo(
                        "Orchagent is not in clean state, upgrading it anyway")
            else:
                click.echo(
                    "Orchagent is in clean state and frozen for warm upgrade")

            warm_app_names = ["orchagent", "neighsyncd"]

        elif container_name == "bgp":
            # Kill bgpd to restart the bgp graceful restart procedure
            click.echo("Stopping bgp ...")
            run_command("docker exec -i bgp pkill -9 zebra")
            run_command("docker exec -i bgp pkill -9 bgpd")
            warm_app_names = ["bgp"]
            click.echo("Stopped  bgp ...")

        elif container_name == "teamd":
            click.echo("Stopping teamd ...")
            # Send USR1 signal to all teamd instances to stop them
            # It will prepare teamd for warm-reboot
            run_command("docker exec -i teamd pkill -USR1 teamd > /dev/null")
            warm_app_names = ["teamsyncd"]
            click.echo("Stopped  teamd ...")

        # clean app reconcilation state from last warm start if exists
        for warm_app_name in warm_app_names:
            hdel_warm_restart_table("STATE_DB", "WARM_RESTART_TABLE",
                                    warm_app_name, "state")

    run_command("docker kill %s > /dev/null" % container_name)
    run_command("docker rm %s " % container_name)
    if tag is None:
        # example image: docker-lldp-sv2:latest
        tag = get_docker_tag_name(image_latest)
    run_command("docker tag %s:latest %s:%s" % (image_name, image_name, tag))
    run_command("systemctl restart %s" % container_name)

    # All images id under the image name
    image_id_all = get_container_image_id_all(image_name)

    # this is image_id for image with "latest" tag
    image_id_latest = get_container_image_id(image_latest)

    for id in image_id_all:
        if id != image_id_latest:
            # Unless requested, the previoud docker image will be preserved
            if not cleanup_image and id == image_id_previous:
                continue
            run_command("docker rmi -f %s" % id)

    exp_state = "reconciled"
    state = ""
    # post warm restart specific procssing for swss, bgp and teamd dockers, wait for reconciliation state.
    if warm_configured == True or warm:
        count = 0
        for warm_app_name in warm_app_names:
            state = ""
            # Wait up to 180 seconds for reconciled state
            while state != exp_state and count < 90:
                sys.stdout.write("\r  {}: ".format(warm_app_name))
                sys.stdout.write("[%-s" % ('=' * count))
                sys.stdout.flush()
                count += 1
                time.sleep(2)
                state = hget_warm_restart_table("STATE_DB",
                                                "WARM_RESTART_TABLE",
                                                warm_app_name, "state")
                syslog.syslog("%s reached %s state" % (warm_app_name, state))
            sys.stdout.write("]\n\r")
            if state != exp_state:
                click.echo("%s failed to reach %s state" %
                           (warm_app_name, exp_state))
                syslog.syslog(
                    syslog.LOG_ERR,
                    "%s failed to reach %s state" % (warm_app_name, exp_state))
    else:
        exp_state = ""  # this is cold upgrade

    # Restore to previous cold restart setting
    if warm_configured == False and warm:
        if container_name == "swss" or container_name == "bgp" or container_name == "teamd":
            run_command("config warm_restart disable %s" % container_name)

    if state == exp_state:
        click.echo('Done')
    else:
        click.echo('Failed')
        sys.exit(1)