示例#1
0
ToDo:
  - configure server
  - support TLS
  - support token auth method
"""
from json import loads
from typing import Dict

from jinja2 import Template

from ceph.ceph import Ceph, CephNode
from utility.log import Log
from utility.utils import get_cephci_config

LOG = Log(__name__)

AGENT_HCL = """pid_file = "/run/vault-agent-pid"

auto_auth {
  method "AppRole" {
    mount_path = "auth/approle"
    config = {
      role_id_file_path = "/usr/local/etc/vault/.app-role-id"
      secret_id_file_path = "/usr/local/etc/vault/.app-secret-id"
      remove_secret_id_file_after_reading = "false"
    }
  }
}
{%- if data.auth == "token" %}
sink "file" {
示例#2
0
2. Increase decrease PG counts, enable - disable - configure modules that do this
3. Enable logging to file, set and reset config params and cluster checks
4. Set-up email alerts and other cluster operations
More operations to be added as needed

"""

import datetime
import json
import re
import time

from ceph.ceph_admin import CephAdmin
from utility.log import Log

log = Log(__name__)


class RadosOrchestrator:
    """
    RadosOrchestrator class contains various methods that perform various day1 and day2 operations on the cluster
    Usage: The class is initialized with the CephAdmin object for various operations
    """
    def __init__(self, node: CephAdmin):
        """
        initializes the env to run rados commands
        Args:
            node: CephAdmin object
        """
        self.node = node
        self.ceph_cluster = node.cluster
示例#3
0
  --hotfix-repo <repo>              To run sanity on hotfix build
  --ignore-latest-container         Skip getting latest nightly container
  --skip-version-compare            Skip verification that ceph versions change post
                                    upgrade
  -c --custom-config <name>=<value> Add a custom config key/value to ceph_conf_overrides
  --custom-config-file <file>       Add custom config yaml to ceph_conf_overrides
  --xunit-results                   Create xUnit result file for test suite run
                                    [default: false]
  --enable-eus                      Enables EUS rpms on EUS suppored distro
                                    [default: false]
  --skip-enabling-rhel-rpms         skip adding rpms from subscription if using beta
                                    rhel images for Interop runs
  --skip-sos-report                 Enables to collect sos-report on test suite failures
                                    [default: false]
"""
log = Log(__name__)
root = logging.getLogger()
root.setLevel(logging.INFO)

formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s")

ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.ERROR)
ch.setFormatter(formatter)
root.addHandler(ch)

test_names = []


@retry(LibcloudError, tries=5, delay=15)
示例#4
0
            User(Role.get(5), "*****@*****.**", "Administrator", "pwd", ""),
            User(Role.get(3), "*****@*****.**", "Benutzer", "pwd", ""),
            User(Role.get(3), "*****@*****.**", "Test Benutzer", "pwd", ""),
        ]
        for item in items:
            item.create()

    # ---------------------------------------------------------------------------- #
    @staticmethod
    def installSessions():
        Session.get(1)


# Initialise application
# -------------------------------------------------------------------------------- #
Log.level(Log.DEBUG)
Log.information(__name__, "Initialising Flask...")
app = Flask(__name__,
            static_folder="../../static",
            template_folder="../../template")
Bootstrap(app)
app.secret_key = Configuration["secret_key"]
cache.init_app(app,
               config={
                   "CACHE_TYPE": Configuration["cache_type"],
                   "CACHE_DIR": Configuration["cache_path"]
               })
Log.information(__name__, "Connecting to database...")
app.config["SQLALCHEMY_DATABASE_URI"] = Configuration["sql_db_uri"]
db.app = app
db.init_app(app)
示例#5
0
@app.route('/node/<node_url>', methods=['DELETE'])
def remove_node(node_url):
    if node_url == '' or node_url == None:
        response = {'message': 'No node attached.'}
        return jsonify(response), 400
    blockchain.remove_peer_node(node_url)
    response = {
        'message': 'Node removed succesfully.',
        'all_nodes': blockchain.get_peer_nodes()
    }
    return jsonify(response), 201


@app.route('/node', methods=['GET'])
def get_node():
    nodes = blockchain.get_peer_nodes()
    response = {'all_nodes': nodes}
    return jsonify(response), 201


if __name__ == '__main__':
    from argparse import ArgumentParser
    parser = ArgumentParser()
    parser.add_argument('-p', '--port', type=int, default=3200)
    args = parser.parse_args()
    port = args.port
    Log.log_message('Server started at port:{}'.format(port), port)
    wallet = Wallet(port)
    blockchain = Blockchain(wallet.public_key, port)
    app.run(host='0.0.0.0', port=port)
示例#6
0
    parser.add_argument("--loss",
                        default=0,
                        type=int,
                        help="= 0, smooth CE; = 1, focal loss.")
    parser.add_argument("--data_bal",
                        default='equal',
                        type=str,
                        help="Set to 'equal' (default) or 'unequal'.")
    args = parser.parse_args()
    print(args)

    initialize(args, seed=42)

    dataset = Cifar(args)

    log = Log(log_each=10)
    if args.multigpu == 1:
        model = WideResNet(args.depth,
                           args.width_factor,
                           args.dropout,
                           in_channels=3,
                           labels=10)
        model = nn.DataParallel(model).cuda()
    else:
        model = WideResNet(args.depth,
                           args.width_factor,
                           args.dropout,
                           in_channels=3,
                           labels=10).cuda()

    base_optimizer = torch.optim.SGD
示例#7
0
"""Cephadm orchestration host operations."""
import json
from copy import deepcopy

from ceph.ceph import CephNode
from ceph.utils import get_node_by_id
from utility.log import Log

from .common import config_dict_to_string
from .helper import monitoring_file_existence
from .maintenance import MaintenanceMixin
from .orch import Orch, ResourceNotFoundError

logger = Log(__name__)
DEFAULT_KEYRING_PATH = "/etc/ceph/ceph.client.admin.keyring"
DEFAULT_CEPH_CONF_PATH = "/etc/ceph/ceph.conf"


class HostOpFailure(Exception):
    pass


class Host(MaintenanceMixin, Orch):
    """Interface for executing ceph host <options> operations."""

    SERVICE_NAME = "host"

    def list(self):
        """
        List the cluster hosts
示例#8
0
文件: train.py 项目: brkmnd/DcrParser
def main_worker(gpu, n_gpus_per_node, args):
    is_master = gpu == 0
    directory = initialize(args,
                           create_directory=is_master,
                           init_wandb=args.log_wandb and is_master)

    os.environ["MASTER_ADDR"] = "localhost"
    if "MASTER_PORT" not in os.environ:
        os.environ["MASTER_PORT"] = "12345"

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method="env://",
                                world_size=n_gpus_per_node,
                                rank=gpu)

    dataset = SharedDataset(args)
    dataset.load_datasets(args, gpu, n_gpus_per_node)

    model = Model(dataset, args)
    parameters = [{
        "params": p,
        "weight_decay": args.encoder_weight_decay
    } for p in model.get_encoder_parameters(args.n_encoder_layers)
                  ] + [{
                      "params": model.get_decoder_parameters(),
                      "weight_decay": args.decoder_weight_decay
                  }]
    optimizer = AdamW(parameters, betas=(0.9, args.beta_2))
    scheduler = multi_scheduler_wrapper(optimizer, args)
    autoclip = AutoClip([
        p for name, p in model.named_parameters() if "loss_weights" not in name
    ])
    if args.balance_loss_weights:
        loss_weight_learner = LossWeightLearner(args, model, n_gpus_per_node)

    if is_master:
        if args.log_wandb:
            import wandb
            wandb.watch(model, log=args.wandb_log_mode)
        print(f"\nmodel: {model}\n")
        log = Log(dataset,
                  model,
                  optimizer,
                  args,
                  directory,
                  log_each=10,
                  log_wandb=args.log_wandb)

    torch.cuda.set_device(gpu)
    model = model.cuda(gpu)

    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model,
                                                          device_ids=[gpu])
        raw_model = model.module
    else:
        raw_model = model

    force_cpu_dev = False  #changed - along below
    if force_cpu_dev:
        dev0 = torch.device("cpu")
        model.to(dev0)
        gpu = dev0

    for epoch in range(args.epochs):

        #
        # TRAINING
        #

        model.train()
        if is_master:
            log.train(len_dataset=dataset.train_size)

        i = 0
        model.zero_grad()
        losses_over_bs = []  #changed - added to accum losses on
        for batch in dataset.train:
            if not force_cpu_dev:  #changed - if clause added
                batch = Batch.to(batch, gpu)
            total_loss, losses, stats = model(batch)

            for head in raw_model.heads:
                stats.update(head.loss_weights_dict())

            if args.balance_loss_weights:
                loss_weight_learner.compute_grad(losses, epoch)

            losses_over_bs.append(
                total_loss.item())  #changed - added for analyzing loss
            total_loss.backward()

            if (i + 1) % args.accumulation_steps == 0:
                grad_norm = autoclip()

                if args.balance_loss_weights:
                    loss_weight_learner.step(epoch)
                scheduler(epoch)
                optimizer.step()
                model.zero_grad()

                if is_master:
                    with torch.no_grad():
                        batch_size = batch["every_input"][0].size(
                            0) * args.accumulation_steps
                        log(batch_size,
                            stats,
                            args.frameworks,
                            grad_norm=grad_norm,
                            learning_rates=scheduler.lr() +
                            [loss_weight_learner.scheduler.lr()])

            del total_loss, losses

            i += 1

        if not is_master:
            continue

        #
        # VALIDATION CROSS-ENTROPIES
        #
        model.eval()
        log.eval(len_dataset=dataset.val_size)

        with torch.no_grad():
            for batch in dataset.val:
                try:
                    _, _, stats = model(Batch.to(batch, gpu))

                    batch_size = batch["every_input"][0].size(0)
                    log(batch_size, stats, args.frameworks)
                except RuntimeError as e:
                    if 'out of memory' in str(e):
                        print('| WARNING: ran out of memory, skipping batch')
                        if hasattr(torch.cuda, 'empty_cache'):
                            torch.cuda.empty_cache()
                    else:
                        raise e

        lobs = np.array(losses_over_bs)  #changed to be uses with below
        print(
            str(lobs.mean()) + "; " + str(lobs.max()) + "; " +
            str(lobs.min()))  #changed - print loss for epoch
        log.flush()

        #
        # VALIDATION MRP-SCORES
        #
        predict(raw_model,
                dataset.val,
                args.validation_data,
                args,
                directory,
                gpu,
                run_evaluation=True,
                epoch=epoch)

    #
    # TEST PREDICTION
    #
    test_fpath = f"{directory}/test_predictions/"  #changed - catch exists error
    if not os.path.exists(test_fpath):
        os.mkdir(test_fpath)

    #os.mkdir(f"{directory}/test_predictions/")
    predict(raw_model, dataset.test, args.test_data, args,
            f"{directory}/test_predictions/", gpu)
示例#9
0
  --hotfix-repo <repo>              To run sanity on hotfix build
  --ignore-latest-container         Skip getting latest nightly container
  --skip-version-compare            Skip verification that ceph versions change post
                                    upgrade
  -c --custom-config <name>=<value> Add a custom config key/value to ceph_conf_overrides
  --custom-config-file <file>       Add custom config yaml to ceph_conf_overrides
  --xunit-results                   Create xUnit result file for test suite run
                                    [default: false]
  --enable-eus                      Enables EUS rpms on EUS suppored distro
                                    [default: false]
  --skip-enabling-rhel-rpms         skip adding rpms from subscription if using beta
                                    rhel images for Interop runs
  --skip-sos-report                 Enables to collect sos-report on test suite failures
                                    [default: false]
"""
log = Log()
test_names = []


@retry(LibcloudError, tries=5, delay=15)
def create_nodes(
    conf,
    inventory,
    osp_cred,
    run_id,
    cloud_type="openstack",
    report_portal_session=None,
    instances_name=None,
    enable_eus=False,
    rp_logger: Optional[ReportPortal] = None,
):