Exemplo n.º 1
0
def _scale_down_scale_up(deployment='ckan', namespace=None, replicas=1):
    logs.info('Scaling ckan replicas')
    kubectl.call(f'scale deployment {deployment} --replicas=0',
                 namespace=namespace)
    kubectl.call(f'scale deployment {deployment} --replicas={replicas}',
                 namespace=namespace)
    time.sleep(20)
Exemplo n.º 2
0
def zk_put_configs(configs_dir):
    pod_name = kubectl.get('pods',
                           '-l',
                           'app=provider-solr-solrcloud-zk',
                           required=True)['items'][0]['metadata']['name']
    for input_filename in glob.glob(f'{configs_dir}/**/*', recursive=True):
        if not os.path.isfile(input_filename): continue
        output_filename = '/configs' + input_filename.replace(configs_dir, '')
        print(f'{input_filename} --> {output_filename}')
        output_filepath = ''
        for output_filepart in output_filename.split('/')[:-1]:
            output_filepart = output_filepart.strip()
            if not output_filepart:
                continue
            output_filepath += f'/{output_filepart}'
            print(f'create {output_filepath} null')
            print(
                kubectl.call(
                    f'exec {pod_name} zkCli.sh create {output_filepath} null'))
        print(f'copy {output_filename}')
        print(kubectl.call(f'cp {input_filename} {pod_name}:/tmp/zk_input'))
        print(f'create {output_filename}')
        print(
            kubectl.call(
                f"exec {pod_name} bash -- -c 'zkCli.sh create {output_filename} \"$(cat /tmp/zk_input)\"'"
            ))
Exemplo n.º 3
0
def restart_solr_pods(show_zookeeper, solrcloud_only, force=False):
    pod_name = '--all'
    force = '--force --grace-period=0' if force else ''
    if show_zookeeper:
        kubectl.delete_items_by_labels(['pod'], {'app':'provider-solr-solrcloud-zk'}, 'ckan-cloud')
        return
    if solrcloud_only:
        kubectl.delete_items_by_labels(['pod'], {'app':'provider-solr-solrcloud-sc'}, 'ckan-cloud')
        return
    kubectl.call(f'delete pods {pod_name} {force}', 'ckan-cloud')
Exemplo n.º 4
0
def delete(router_name):
    print(f'Deleting traefik router {router_name}')
    if all([
        kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} deployment') == 0,
        kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} service') == 0,
        kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} secret') == 0,
        kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} configmap') == 0,
        kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} PersistentVolumeClaim') == 0,
        kubectl.call(f'delete --ignore-not-found -l ckan-cloud/router-name={router_name} CkanCloudRoute') == 0,
        kubectl.call(f'delete --ignore-not-found CkanCloudRouter {router_name}') == 0,
    ]):
        print('Removing finalizers')
        success = True
        routes = kubectl.get_items_by_labels('CkanCloudRoute', {'ckan-cloud/router-name': router_name}, required=False)
        if not routes: routes = []
        for route in routes:
            route_name = route['metadata']['name']
            if kubectl.call(
                    f'patch CkanCloudRoute {route_name} -p \'{{"metadata":{{"finalizers":[]}}}}\' --type=merge',
            ) != 0:
                success = False
        if kubectl.get(f'CkanCloudRouter {router_name}', required=False):
            if kubectl.call(
                    f'patch CkanCloudRouter {router_name} -p \'{{"metadata":{{"finalizers":[]}}}}\' --type=merge',
            ) != 0:
                success = False
        assert success
    else:
        raise Exception('Deletion failed')
Exemplo n.º 5
0
def delete(instance_id, instance):
    tiller_namespace_name = _get_resource_name()
    ckan_helm_release_name = f'ckan-cloud-{instance_id}'
    errors = []
    try:
        logs.info(f'Deleting helm release {ckan_helm_release_name}')
        helm_driver.delete(tiller_namespace_name, ckan_helm_release_name)
    except Exception as e:
        logs.warning(traceback.format_exc())
        errors.append(f'Failed to delete helm release')
    if kubectl.call(f'delete --wait=false namespace {instance_id}') != 0:
        errors.append(f'Failed to delete namespace')
    assert len(errors) == 0, ', '.join(errors)
Exemplo n.º 6
0
def delete(instance_id, instance):
    tiller_namespace_name = _get_tiller_namespace_name(instance_id, instance)
    release_name = _get_helm_release_name(instance_id, instance)
    logs.info(tiller_namespace_name=tiller_namespace_name,
              release_name=release_name)
    errors = []
    try:
        logs.info(f'Deleting helm release {release_name}')
        delete_kwargs = dict(tiller_namespace=tiller_namespace_name,
                             release_name=release_name)
        app_type = instance['spec'].get('app-type')
        if app_type:
            _get_app_type_manager(app_type).pre_delete_hook(
                instance_id, instance, delete_kwargs)
        helm_driver.delete(**delete_kwargs)
        if app_type:
            _get_app_type_manager(app_type).post_delete_hook(
                instance_id, instance, delete_kwargs)
    except Exception as e:
        logs.warning(traceback.format_exc())
        errors.append(f'Failed to delete helm release')
    if kubectl.call(f'delete --wait=false namespace {instance_id}') != 0:
        errors.append(f'Failed to delete namespace')
    assert len(errors) == 0, ', '.join(errors)
Exemplo n.º 7
0
    def create(cls, *args, **kwargs):
        create_type = args[0]
        instance_id = args[-1]
        from ckan_cloud_operator.providers.db.manager import get_default_db_prefix
        db_prefix = kwargs['db_prefix'] if kwargs.get('db_prefix') else get_default_db_prefix()
        if create_type == 'from-gitlab':
            gitlab_repo = args[1]
            solr_config = args[2]
            db_name = instance_id
            datastore_name = f'{instance_id}-datastore'
            storage_path = kwargs.get('storage_path') or f'/ckan/{instance_id}'
            from_db_backups = kwargs.get('from_db_backups')
            logs.info(f'Creating Deis CKAN instance {instance_id}', gitlab_repo=gitlab_repo, solr_config=solr_config,
                      db_name=db_name, datastore_name=datastore_name, storage_path=storage_path,
                      from_db_backups=from_db_backups)

            if kwargs.get('use_private_gitlab_repo'):
                deploy_token_server = input('Gitlab registry url [default: registry.gitlab.com]: ') or 'registry.gitlab.com'
                deploy_token_username = input('Gitlab deploy token username: '******'Gitlab deploy token password: '******'delete secret private-gitlab-registry', namespace=instance_id)
                kubectl.call(f'create secret docker-registry private-gitlab-registry --docker-server={deploy_token_server} --docker-username={deploy_token_username} --docker-password={deploy_token_password}', namespace=instance_id)

            if from_db_backups:
                db_import_url, datastore_import_url = from_db_backups.split(',')
                migration_name = None
                success = False
                for event in ckan_db_migration_manager.migrate_deis_dbs(None, db_name, datastore_name,
                                                                        db_import_url=db_import_url,
                                                                        datastore_import_url=datastore_import_url,
                                                                        rerun=kwargs.get('rerun'),
                                                                        force=kwargs.get('force'),
                                                                        recreate_dbs=kwargs.get('recreate_dbs'),
                                                                        db_prefix=db_prefix):
                    migration_name = ckan_db_migration_manager.get_event_migration_created_name(event) or migration_name
                    success = ckan_db_migration_manager.print_event_exit_on_complete(
                        event,
                        f'DBs import {from_db_backups} -> {db_name}, {datastore_name}',
                        soft_exit=True
                    )
                    if success is not None:
                        break
                assert success, f'Invalid DB migration success value ({success})'
            else:
                migration_name = None
            spec = {
                'ckanPodSpec': {},
                'ckanContainerSpec': {'imageFromGitlab': gitlab_repo},
                'envvars': {'fromGitlab': gitlab_repo},
                'solrCloudCollection': {
                    'name': kwargs.get('solr_collection') or instance_id,
                    'configName': solr_config
                },
                'db': {
                    'name': db_name,
                    **({'fromDbMigration': migration_name} if migration_name else {}),
                    **({'dbPrefix': db_prefix} if db_prefix else {})
                },
                'datastore': {
                    'name': datastore_name,
                    **({'fromDbMigration': migration_name} if migration_name else {}),
                    **({'dbPrefix': db_prefix} if db_prefix else {})
                },
                'storage': {
                    'path': storage_path,
                }
            }
            if kwargs.get('use_private_gitlab_repo'):
                spec['ckanContainerSpec']['imagePullSecrets'] = [{'name': 'private-gitlab-registry'}]
        elif create_type == 'from-gcloud-envvars':
            print(f'Creating Deis CKAN instance {instance_id} from gcloud envvars import')
            instance_env_yaml, image, solr_config, storage_path, instance_id = args[1:]
            db_migration_name = kwargs.get('db_migration_name')
            assert db_migration_name, 'creating from gcloud envvars without a db migration is not supported yet'
            if type(instance_env_yaml) == str:
                logs.info(f'Creating {instance_id}-envvars secret from file: {instance_env_yaml}')
                subprocess.check_call(
                    f'kubectl -n ckan-cloud create secret generic {instance_id}-envvars --from-file=envvars.yaml={instance_env_yaml}',
                    shell=True
                )
            else:
                logs.info(f'Creating {instance_id}-envvars secret from inline string')
                kubectl.update_secret(f'{instance_id}-envvars', {'envvars.yaml': yaml.dump(instance_env_yaml, default_flow_style=False)})
            spec = {
                'ckanPodSpec': {},
                'ckanContainerSpec': {'image': image},
                'envvars': {'fromSecret': f'{instance_id}-envvars'},
                'solrCloudCollection': {
                    'name': instance_id,
                    'configName': solr_config
                },
                'db': {
                    'name': instance_id,
                    'fromDbMigration':db_migration_name,
                    **({'dbPrefix': db_prefix} if db_prefix else {})
                },
                'datastore': {
                    'name': f'{instance_id}-datastore',
                    'fromDbMigration': db_migration_name,
                    **({'dbPrefix': db_prefix} if db_prefix else {})
                },
                'storage': {
                    'path': storage_path
                }
            }
        else:
            raise NotImplementedError(f'invalid create type: {create_type}')
        instance_kind = ckan_manager.instance_kind()
        instance = {
            'apiVersion': f'stable.viderum.com/v1',
            'kind': instance_kind,
            'metadata': {
                'name': instance_id,
                'namespace': 'ckan-cloud',
                'finalizers': ['finalizer.stable.viderum.com']
            },
            'spec': spec
        }
        subprocess.run('kubectl apply -f -', input=yaml.dump(instance).encode(), shell=True, check=True)
        return cls(instance_id, values=instance)
Exemplo n.º 8
0
def _wait_instance_events(instance_id):
    import logging
    start_time = datetime.datetime.now()
    last_message = 0
    logs.info('Waiting for instance events', start_time=start_time)
    missing_events = None
    while True:
        time.sleep(15)
        currently_missing, errors, ckan_logs = _check_instance_events(
            instance_id)
        if len(currently_missing) == 0:
            logs.info('All instance events completed successfully')
            break
        if currently_missing != missing_events:
            missing_events = currently_missing
            logs.info('Still waiting for', repr(sorted(missing_events)))
            start_time = datetime.datetime.now()
        time_passed = (datetime.datetime.now() - start_time).total_seconds()
        if time_passed - last_message >= 60:
            logs.info('%d seconds since started waiting' % time_passed)
            last_message += 60
        if time_passed > int(os.environ.get('CCO_WAIT_TIMEOUT', 500)):
            failed_pods = [
                item for item in kubectl.get(f'pods -n {instance_id}').get(
                    'items', []) if not all(
                        stat.get('ready')
                        for stat in item['status']['containerStatuses'])
            ]
            logs.info('*** SOMETHING WENT WRONG!!! ***')
            logs.info(100 * '#')
            logs.info(100 * '#')
            if not len(failed_pods):
                logs.info('But we could not get failing containers')
                logs.info(
                    'You may try increasing default wait timeout by setting CCO_WAIT_TIMEOUT environment variable [default: 500]'
                )
                ckan_pod_name = [
                    item['metadata']['name']
                    for item in kubectl.get(f'pods -n {instance_id}').get(
                        'items', []) if item.get('metadata', {}).get(
                            'labels', {}).get('app') == 'ckan'
                ][0]
                _log_container_error('CONTAINER LOGS', ckan_pod_name, 'ckan')
                kubectl.call(f'logs {ckan_pod_name}', namespace=instance_id)

            logs.info('Numbe of Failed Pods: %s' % len(failed_pods))
            for pod_meta in failed_pods:
                init_containers = pod_meta['status'].get(
                    'initContainerStatuses')
                pod_name = pod_meta['metadata']['name']
                if init_containers is not None:
                    logs.info('Checking Init Containers in %s' % pod_name)
                    for i, init_container in enumerate(init_containers):
                        if not init_container.get('ready'):
                            container_name = pod_meta['spec'][
                                'initContainers'][i]['name']
                            _log_container_error('INIT CONTAINER LOGS',
                                                 pod_name, container_name)
                            kubectl.call(
                                f'logs {pod_name} -c {container_name}',
                                namespace=instance_id)
                        else:
                            logs.info('Init Containers are fine in %s' %
                                      pod_name)
                logs.info('Checking Containers in %s' % pod_name)
                container_stats = pod_meta['status'].get('containerStatuses')
                no_log_statuses = ['PodInitializing']
                pod_status = [
                    stat.get('state', {}).get('waiting', {}).get('reason')
                    in no_log_statuses for stat in container_stats
                ]
                if all(pod_status):
                    logs.info('Pod %s looks good describing:' % pod_name)
                    _log_container_error('KUBECTL DESCRIBE POD', pod_name)
                    kubectl.call(f'describe pod {pod_name}',
                                 namespace=instance_id)
                else:
                    logs.info('Pod %s look did not start:' % pod_name)
                    for i, container in enumerate(container_stats):
                        container_name = pod_meta['spec']['containers'][i][
                            'name']
                        _log_container_error('CONTAINER LOGS', pod_name,
                                             container_name)
                        kubectl.call(f'logs {pod_name}', namespace=instance_id)

            logs.info(100 * '#')
            logs.info(100 * '#')
            raise Exception('timed out waiting for instance events')
def main(instance_id, old_db_prefix, new_db_prefix, down_time_approval_code,
         new_instance_id, gitlab_repo, solr_collection_name, storage_path):
    logs.info(instance_id=instance_id,
              old_db_prefix=old_db_prefix,
              new_db_prefix=new_db_prefix,
              down_time_approval_code=down_time_approval_code)
    instance = kubectl.get('ckancloudckaninstance', instance_id)
    current_db_prefix = instance['spec']['db'].get('dbPrefix', 'prod')
    current_datastore_prefix = instance['spec']['datastore'].get(
        'dbPrefix', 'prod')
    assert current_db_prefix == current_datastore_prefix, 'different prefix for datastore and DB is not supported yet'
    db_name = instance['spec']['db']['name']
    datastore_name = instance['spec']['datastore']['name']
    logs.info(current_db_prefix=current_db_prefix,
              db_name=db_name,
              current_datastore_prefix=current_datastore_prefix,
              datastore_name=datastore_name)
    if current_db_prefix == old_db_prefix:
        if down_time_approval_code:
            _check_down_time_approval_code(instance_id, old_db_prefix,
                                           new_db_prefix,
                                           down_time_approval_code, db_name,
                                           datastore_name)
            logs.info(
                f'Deleting instance deployment (namespace={instance_id} deployment={instance_id}'
            )
            kubectl.call(f'delete deployment {instance_id} --wait=false',
                         namespace=instance_id)
            logs.info('Creating DB backups')
            assert gcloudsql_manager.create_backup(db_name), 'failed db backup'
            assert gcloudsql_manager.create_backup(
                datastore_name), 'failed datastore backup'
            db_backup_url, datastore_backup_url = _get_latest_backups(
                db_name, datastore_name)
            logs.important_log(logs.INFO, db_backup_url=db_backup_url)
            logs.important_log(logs.INFO,
                               datastore_backup_url=datastore_backup_url)
            logs.info(
                'Creating parameters file to trigger copy instance job to create the new instance'
            )
            with open('copy_instance_params', 'w') as f:
                f.write(f'OLD_INSTANCE_ID={instance_id}\n'
                        f'NEW_INSTANCE_ID={new_instance_id}\n'
                        f'NEW_GITLAB_REPO={gitlab_repo}\n'
                        f'NEW_DB_PREFIX={new_db_prefix}\n'
                        f'IMPORT_DATE_PATH=\n'
                        f'IMPORT_HOUR=\n'
                        f'SKIP_MINIO_MIRROR=no\n'
                        f'SKIP_CREATE=no\n'
                        f'SKIP_ROUTER=no\n'
                        f'USE_EXISTING_MIGRATION=no\n'
                        f'NEW_SOLR_COLLECTION_NAME={solr_collection_name}\n'
                        f'NEW_STORAGE_PATH={storage_path}\n'
                        f'DATABASE_IMPORT_URL={db_backup_url}\n'
                        f'DATASTORE_IMPORT_URL={datastore_backup_url}\n'
                        f'DRY_RUN=yes\n')
            with open('change_instance_routes_params', 'w') as f:
                f.write(f'OLD_INSTANCE_ID={instance_id}\n'
                        f'NEW_INSTANCE_ID={new_instance_id}\n'
                        f'DRY_RUN=yes\n')
            logs.exit_great_success()
        else:
            down_time_approval_code = _create_down_time_approval_code(
                instance_id, old_db_prefix, new_db_prefix, db_name,
                datastore_name)
            logs.important_log(
                logs.INFO,
                f'DOWN_TIME_APPROVAL_CODE={down_time_approval_code}')
            logs.exit_great_success(quiet=True)
    else:
        raise NotImplementedError()
Exemplo n.º 10
0
def set_image(instance_id, image_name, service='ckan', container_name=None):
    cont_name = container_name or service
    deployment_info = kubectl.call(
        f'set image deployment/{service} {cont_name}={image_name}',
        namespace=instance_id)
Exemplo n.º 11
0
def kubectl_command(arg):
    from ckan_cloud_operator import kubectl
    exit(kubectl.call(' '.join(arg)))
Exemplo n.º 12
0
def get_solr_pods(format=''):
    kubectl.call(f'get pods', 'ckan-cloud')
Exemplo n.º 13
0
def _scale_down_scale_up(deployment='router-traefik-instances-default',
                         replicas=1):
    kubectl.call(f'scale deployment {deployment} --replicas=0')
    kubectl.call(f'scale deployment {deployment} --replicas={replicas}')