예제 #1
0
    def setUp(self):

        # Call father's method
        super().setUp()

        log.info("\n### Creating a test token (for ANONYMOUS IRODS user) ###")
        credentials = json.dumps({'username': self._anonymous_user})
        endpoint = self._auth_uri + self._auth_endpoint

        log.debug('*** Testing anonymous authentication on %s' % endpoint)
        r = self.app.post(endpoint, data=credentials)
        self.assertEqual(r.status_code, self._hcodes.HTTP_OK_BASIC)
        content = self.get_content(r)
        self.save_token(content.get('token'), suffix=self._anonymous_user)

        self.irods_vars = detector.services_classes.get('irods').variables
        self._filename = 'some_file.txt'
        home_dirname = 'home'
        self._ipath = str(
            path.join(path.root(), self.irods_vars.get('zone'), home_dirname,
                      self.irods_vars.get('guest_user'), self._filename))
        self._no_permission_path = str(
            path.join(path.root(), self.irods_vars.get('zone'), home_dirname,
                      'nonexisting'))
        log.debug('*** Upload a test file: %s' % self._ipath)

        # Upload entity in test folder
        endpoint = self._api_uri + self._register_endpoint + self._ipath
        r = self.app.put(endpoint,
                         data=dict(file=(io.BytesIO(b"just a test"),
                                         self._filename)),
                         headers=self.__class__.auth_header)
        self.assertEqual(r.status_code, self._hcodes.HTTP_OK_BASIC)
예제 #2
0
    def publish_helper(self, icom, ipath, check_only=True, unpublish=False):

        from utilities import path
        current_zone = icom.get_current_zone()
        ipath_steps = path.parts(ipath)
        current = ''

        for ipath_step in ipath_steps:

            current = path.join(current, ipath_step, return_str=True)
            # print("PUB STEP:", ipath_step, current, len(current))

            # to skip: root dir, zone and home
            if len(ipath_step) == 1 \
               or ipath_step == current_zone or ipath_step == 'home':
                continue

            # find out if already published
            check = self.single_path_check(icom, current_zone, str(current))
            # if only checking
            if check_only and not check:
                return False
            # otherwise you want to publish/unpublish this path
            else:
                if unpublish:
                    self.single_permission(icom, current, permission=None)
                else:
                    self.single_permission(icom, current, permission='read')

        return True
예제 #3
0
    def get_download(self,
                     imain,
                     order_id,
                     order_path,
                     files,
                     restricted=False,
                     index=None):

        zip_file_name = get_order_zip_file_name(order_id, restricted, index)

        if zip_file_name not in files:
            return None

        zip_ipath = path.join(order_path, zip_file_name, return_str=True)
        log.debug("Zip irods path: %s", zip_ipath)

        code = self.no_slash_ticket(imain, zip_ipath)
        ftype = ""
        if restricted:
            ftype += "1"
        else:
            ftype += "0"
        if index is None:
            ftype += "0"
        else:
            ftype += str(index)

        route = '%s%s/%s/%s/download/%s/c/%s' % (CURRENT_HTTPAPI_SERVER,
                                                 API_URL, ORDERS_ENDPOINT,
                                                 order_id, ftype, code)

        # If metadata already exists, remove them:
        # FIXME: verify if iticket_code is set and then invalidate it
        imain.remove_metadata(zip_ipath, 'iticket_code')
        imain.remove_metadata(zip_ipath, 'download')
        ##################
        # Set the url as Metadata in the irods file
        imain.set_metadata(zip_ipath, download=route)

        # TOFIX: we should add a database or cache to save this,
        # not irods metadata (known for low performances)
        imain.set_metadata(zip_ipath, iticket_code=code)

        info = files[zip_file_name]

        return {
            'name': zip_file_name,
            'url': route,
            'size': info.get('content_length', 0)
        }
예제 #4
0
    def delete(self):

        json_input = self.get_input()

        # imain = self.get_service_instance(service_name='irods')
        imain = self.get_main_irods_connection()
        order_path = self.get_irods_order_path(imain)
        local_order_path = str(path.join(MOUNTPOINT, ORDERS_DIR))
        log.debug("Order collection: %s", order_path)
        log.debug("Order path: %s", local_order_path)

        task = CeleryExt.delete_orders.apply_async(
            args=[order_path, local_order_path, json_input])
        log.info("Async job: %s", task.id)
        return self.return_async_id(task.id)
예제 #5
0
    def get(self, batch_id):

        log.info("Batch request: %s", batch_id)
        # json = {'batch_id': batch_id}
        # msg = prepare_message(
        #     self, json=json, user=ingestion_user, log_string='start')
        # log_into_queue(self, msg)

        ########################
        # get irods session

        # imain = self.get_service_instance(service_name='irods')
        imain = self.get_main_irods_connection()

        batch_path = self.get_irods_batch_path(imain, batch_id)
        local_path = path.join(MOUNTPOINT, INGESTION_DIR, batch_id)
        log.info("Batch irods path: %s", batch_path)
        log.info("Batch local path: %s", local_path)

        batch_status, batch_files = self.get_batch_status(
            imain, batch_path, local_path)

        ########################
        # if not imain.is_collection(batch_path):
        if batch_status == MISSING_BATCH:
            return self.send_errors(
                "Batch '%s' not enabled or you have no permissions" % batch_id,
                code=hcodes.HTTP_BAD_NOTFOUND)

        if batch_status == BATCH_MISCONFIGURATION:
            log.error('Misconfiguration: %s files in %s (expected 1)',
                      len(batch_files), batch_path)
            return self.send_errors("Misconfiguration for batch_id %s" %
                                    batch_id,
                                    code=hcodes.HTTP_BAD_RESOURCE)

        data = {}
        data['batch'] = batch_id
        if batch_status == NOT_FILLED_BATCH:
            data['status'] = 'not_filled'
        elif batch_status == ENABLED_BATCH:
            data['status'] = 'enabled'
        elif batch_status == PARTIALLY_ENABLED_BATCH:
            data['status'] = 'partially_enabled'

        data['files'] = batch_files
        return data
예제 #6
0
    def delete(self):

        json_input = self.get_input()

        # imain = self.get_service_instance(service_name='irods')
        imain = self.get_main_irods_connection()
        batch_path = self.get_irods_batch_path(imain)
        local_batch_path = str(path.join(MOUNTPOINT, INGESTION_DIR))
        log.debug("Batch collection: %s", batch_path)
        log.debug("Batch path: %s", local_batch_path)

        task = CeleryExt.delete_batches.apply_async(
            args=[batch_path, local_batch_path, json_input],
            queue='ingestion',
            routing_key='ingestion')
        log.info("Async job: %s", task.id)
        return self.return_async_id(task.id)
예제 #7
0
    def put(self, batch_id, qc_name):
        """ Launch a quality check inside a container """

        ###########################
        # get name from batch
        # imain = self.get_service_instance(service_name='irods')
        imain = self.get_main_irods_connection()
        batch_path = self.get_irods_batch_path(imain, batch_id)
        local_path = path.join(MOUNTPOINT, INGESTION_DIR, batch_id)
        log.info("Batch irods path: %s", batch_path)
        log.info("Batch local path: %s", local_path)
        batch_status, batch_files = self.get_batch_status(
            imain, batch_path, local_path)

        if batch_status == MISSING_BATCH:
            return self.send_errors(
                "Batch '%s' not found (or no permissions)" % batch_id,
                code=hcodes.HTTP_BAD_NOTFOUND)

        if batch_status == NOT_FILLED_BATCH:
            return self.send_errors("Batch '%s' not yet filled" % batch_id,
                                    code=hcodes.HTTP_BAD_RESOURCE)

        if batch_status == BATCH_MISCONFIGURATION:
            log.error('Misconfiguration: %s files in %s (expected 1)',
                      len(batch_files), batch_path)
            return self.send_errors("Misconfiguration for batch_id %s" %
                                    batch_id,
                                    code=hcodes.HTTP_BAD_RESOURCE)

        ###################
        # Parameters (and checks)
        envs = {}
        input_json = self.get_input()

        # TODO: backdoor check - remove me
        bd = input_json.pop('eudat_backdoor', False)
        if bd:
            im_prefix = 'eudat'
        else:
            im_prefix = 'maris'
        log.debug("Image prefix: %s", im_prefix)

        # input parameters to be passed to container
        pkey = "parameters"
        param_keys = [
            "request_id", "edmo_code", "datetime", "api_function", "version",
            "test_mode", pkey
        ]
        for key in param_keys:
            if key == pkey:
                continue
            value = input_json.get(key, None)
            if value is None:
                return self.send_errors('Missing JSON key: %s' % key,
                                        code=hcodes.HTTP_BAD_REQUEST)

        response = {
            'batch_id': batch_id,
            'qc_name': qc_name,
            'status': 'executed',
            'input': input_json,
        }

        ###################
        try:
            rancher = self.get_or_create_handle()
        except BaseException as e:
            log.critical(str(e))
            return self.send_errors(
                'Cannot establish a connection with Rancher',
                code=hcodes.HTTP_SERVER_ERROR)

        container_name = self.get_container_name(batch_id, qc_name,
                                                 rancher._qclabel)

        # Duplicated quality checks on the same batch are not allowed
        container_obj = rancher.get_container_object(container_name)
        if container_obj is not None:
            log.error("Docker container %s already exists!", container_name)
            response['status'] = 'existing'
            code = hcodes.HTTP_BAD_CONFLICT
            return self.force_response(response,
                                       errors=[response['status']],
                                       code=code)

        docker_image_name = self.get_container_image(qc_name, prefix=im_prefix)

        ###########################
        # ## ENVS

        host_ingestion_path = self.get_ingestion_path_on_host(batch_id)
        container_ingestion_path = self.get_ingestion_path_in_container()

        envs['BATCH_DIR_PATH'] = container_ingestion_path
        from seadata.apis.commons.queue import QUEUE_VARS
        from seadata.apis.commons.cluster import CONTAINERS_VARS
        for key, value in QUEUE_VARS.items():
            if key in ['enable']:
                continue
            elif key == 'user':
                value = CONTAINERS_VARS.get('rabbituser')
            elif key == 'password':
                value = CONTAINERS_VARS.get('rabbitpass')
            envs['LOGS_' + key.upper()] = value
        # envs['DB_USERNAME'] = CONTAINERS_VARS.get('dbuser')
        # envs['DB_PASSWORD'] = CONTAINERS_VARS.get('dbpass')
        # envs['DB_USERNAME_EDIT'] = CONTAINERS_VARS.get('dbextrauser')
        # envs['DB_PASSWORD_EDIT'] = CONTAINERS_VARS.get('dbextrapass')

        # FOLDER inside /batches to store temporary json inputs
        # TODO: to be put into the configuration
        JSON_DIR = 'json_inputs'

        # Mount point of the json dir into the QC container
        QC_MOUNTPOINT = '/json'

        json_path_backend = os.path.join(MOUNTPOINT, INGESTION_DIR, JSON_DIR)

        if not os.path.exists(json_path_backend):
            log.info("Creating folder %s", json_path_backend)
            os.mkdir(json_path_backend)

        json_path_backend = os.path.join(json_path_backend, batch_id)

        if not os.path.exists(json_path_backend):
            log.info("Creating folder %s", json_path_backend)
            os.mkdir(json_path_backend)

        json_input_file = "input.%s.json" % int(time.time())
        json_input_path = os.path.join(json_path_backend, json_input_file)
        with open(json_input_path, "w+") as f:
            f.write(json.dumps(input_json))

        json_path_qc = self.get_ingestion_path_on_host(JSON_DIR)
        json_path_qc = os.path.join(json_path_qc, batch_id)
        envs['JSON_FILE'] = os.path.join(QC_MOUNTPOINT, json_input_file)

        extra_params = {
            'dataVolumes': [
                "%s:%s" % (host_ingestion_path, container_ingestion_path),
                "%s:%s" % (json_path_qc, QC_MOUNTPOINT)
            ],
            'environment':
            envs
        }
        if bd:
            extra_params['command'] = ['/bin/sleep', '999999']

        # log.info(extra_params)
        ###########################
        errors = rancher.run(container_name=container_name,
                             image_name=docker_image_name,
                             private=True,
                             extras=extra_params)

        if errors is not None:
            if isinstance(errors, dict):
                edict = errors.get('error', {})

                # This case should never happens, since already verified before
                if edict.get('code') == 'NotUnique':
                    response['status'] = 'existing'
                    code = hcodes.HTTP_BAD_CONFLICT
                else:
                    response['status'] = 'could NOT be started'
                    response['description'] = edict
                    code = hcodes.HTTP_SERVER_ERROR
            else:
                response['status'] = 'failure'
                code = hcodes.HTTP_SERVER_ERROR
            return self.force_response(response,
                                       errors=[response['status']],
                                       code=code)

        return response
예제 #8
0
    def get(self, order_id, ftype, code):
        """ downloading (not authenticated) """
        log.info("Order request: %s (code '%s')", order_id, code)
        json = {'order_id': order_id, 'code': code}
        msg = prepare_message(self,
                              json=json,
                              user='******',
                              log_string='start')
        log_into_queue(self, msg)

        log.info("DOWNLOAD DEBUG 1: %s (code '%s')", order_id, code)

        ##################
        # imain = self.get_service_instance(service_name='irods')
        imain = self.get_main_irods_connection()
        log.info("DOWNLOAD DEBUG 2: %s (code '%s')", order_id, code)
        order_path = self.get_irods_order_path(imain, order_id)
        log.info("DOWNLOAD DEBUG 3: %s (code '%s') - %s", order_id, code,
                 order_path)

        zip_file_name = self.get_filename_from_type(order_id, ftype)

        if zip_file_name is None:
            return self.send_errors("Invalid file type %s" % ftype)

        zip_ipath = path.join(order_path, zip_file_name, return_str=True)

        error = "Order '%s' not found (or no permissions)" % order_id

        log.debug("Checking zip irods path: %s", zip_ipath)
        log.info("DOWNLOAD DEBUG 4: %s (code '%s') - %s", order_id, code,
                 zip_ipath)
        if not imain.is_dataobject(zip_ipath):
            log.error("File not found %s", zip_ipath)
            return self.send_errors({order_id: error},
                                    code=hcodes.HTTP_BAD_NOTFOUND)

        log.info("DOWNLOAD DEBUG 5: %s (code '%s')", order_id, code)

        # TOFIX: we should use a database or cache to save this,
        # not irods metadata (known for low performances)
        metadata, _ = imain.get_metadata(zip_ipath)
        log.info("DOWNLOAD DEBUG 6: %s (code '%s')", order_id, code)
        iticket_code = metadata.get('iticket_code')
        log.info("DOWNLOAD DEBUG 7: %s (code '%s')", order_id, code)

        encoded_code = urllib.parse.quote_plus(code)
        log.info("DOWNLOAD DEBUG 8: %s (code '%s')", order_id, code)

        if iticket_code != encoded_code:
            log.error("iticket code does not match %s", zip_ipath)
            return self.send_errors({order_id: error},
                                    code=hcodes.HTTP_BAD_NOTFOUND)

        # NOTE: very important!
        # use anonymous to get the session here
        # because the ticket supply breaks the iuser session permissions
        icom = self.get_service_instance(service_name='irods',
                                         user='******',
                                         password='******',
                                         authscheme='credentials')
        log.info("DOWNLOAD DEBUG 9: %s (code '%s')", order_id, code)
        # obj = self.init_endpoint()
        # icom = obj.icommands
        icom.ticket_supply(code)

        log.info("DOWNLOAD DEBUG 10: %s (code '%s')", order_id, code)
        if not icom.test_ticket(zip_ipath):
            log.error("Invalid iticket code %s", zip_ipath)
            return self.send_errors({order_id: "Invalid code"},
                                    code=hcodes.HTTP_BAD_NOTFOUND)

        # # TODO: push pdonorio/prc
        # tickets = imain.list_tickets()
        # print(tickets)

        # iticket mod "$TICKET" add user anonymous
        # iticket mod "$TICKET" uses 1
        # iticket mod "$TICKET" expire "2018-03-23.06:50:00"

        # ##################
        # response = {order_id: 'valid'}
        # return self.force_response(response)
        headers = {
            'Content-Transfer-Encoding': 'binary',
            'Content-Disposition': "attachment; filename=%s" % zip_file_name,
        }
        msg = prepare_message(self, json=json, log_string='end', status='sent')
        log_into_queue(self, msg)
        log.info("DOWNLOAD DEBUG 11: %s (code '%s')", order_id, code)
        return icom.stream_ticket(zip_ipath, headers=headers)
예제 #9
0
    def post(self):

        ##################
        log.debug('POST request on orders')
        json_input = self.get_input()
        msg = prepare_message(self, json=json_input, log_string='start')
        log_into_queue(self, msg)

        ##################
        main_key = 'parameters'
        params = json_input.get(main_key, {})
        if len(params) < 1:
            error = "'%s' missing" % main_key
            return self.send_errors(error, code=hcodes.HTTP_BAD_REQUEST)

        ##################
        key = 'order_number'
        order_id = params.get(key)
        if order_id is None:
            error = "Order ID '%s': missing" % key
            return self.send_errors(error, code=hcodes.HTTP_BAD_REQUEST)
        else:
            order_id = str(order_id)

        # ##################
        # Get filename from json input. But it has to follow a
        # specific pattern, so we ignore client input if it does not...
        filename = "order_%s_unrestricted" % order_id
        key = 'file_name'
        if key in params and not params[key] == filename:
            log.warn('Client provided wrong filename (%s), will use: %s',
                     params[key], filename)
        params[key] = filename

        ##################
        # PIDS: can be empty if restricted
        key = 'pids'
        pids = params.get(key, [])

        ##################
        # Create the path
        log.info("Order request: %s", order_id)
        # imain = self.get_service_instance(service_name='irods')
        imain = self.get_main_irods_connection()
        order_path = self.get_irods_order_path(imain, order_id)
        log.debug("Order path: %s", order_path)
        if not imain.is_collection(order_path):
            obj = self.init_endpoint()
            # Create the path and set permissions
            imain.create_collection_inheritable(order_path, obj.username)

        ##################
        # Does the zip already exists?
        # zip_file_name = path.append_compress_extension(order_id)
        zip_file_name = path.append_compress_extension(filename)
        zip_ipath = path.join(order_path, zip_file_name, return_str=True)
        if imain.is_dataobject(zip_ipath):
            # give error here
            # return {order_id: 'already exists'}
            # json_input['status'] = 'exists'
            json_input['parameters'] = {'status': 'exists'}
            return json_input

        ################
        # ASYNC
        if len(pids) > 0:
            log.info("Submit async celery task")
            task = CeleryExt.unrestricted_order.apply_async(
                args=[order_id, order_path, zip_file_name, json_input])
            log.info("Async job: %s", task.id)
            return self.return_async_id(task.id)

        # ################
        # msg = prepare_message(self, log_string='end')
        # # msg = prepare_message(self, log_string='end', status='created')
        # msg['parameters'] = {
        #     "request_id": msg['request_id'],
        #     "zipfile_name": params['file_name'],
        #     "zipfile_count": 1,
        # }
        # log_into_queue(self, msg)

        # ################
        # # return {order_id: 'created'}
        # # json_input['status'] = 'created'
        # json_input['request_id'] = msg['request_id']
        # json_input['parameters'] = msg['parameters']
        # if len(errors) > 0:
        #     json_input['errors'] = errors

        # # call Import manager to notify
        # api = API()
        # api.post(json_input)

        return {'status': 'enabled'}
예제 #10
0
    def post(self, batch_id):
        json_input = self.get_input()

        obj = self.init_endpoint()
        imain = self.get_main_irods_connection()

        batch_path = self.get_irods_batch_path(imain, batch_id)
        log.info("Batch irods path: %s", batch_path)
        local_path = path.join(MOUNTPOINT, INGESTION_DIR, batch_id)
        log.info("Batch local path: %s", local_path)
        """
        Create the batch folder if not exists
        """

        # Log start (of enable) into RabbitMQ
        log_msg = prepare_message(self,
                                  json={'batch_id': batch_id},
                                  user=ingestion_user,
                                  log_string='start')
        log_into_queue(self, log_msg)

        ##################
        # Does it already exist?
        # Create the collection and set permissions in irods
        if not imain.is_collection(batch_path):

            imain.create_collection_inheritable(batch_path, obj.username)
        else:
            log.warning("Irods batch collection already exists")

        # Create the folder on filesystem
        if not path.file_exists_and_nonzero(local_path):

            # Create superdirectory and directory on file system:
            try:
                # TODO: REMOVE THIS WHEN path.create() has parents=True!
                import os
                superdir = os.path.join(MOUNTPOINT, INGESTION_DIR)
                if not os.path.exists(superdir):
                    log.debug('Creating %s...', superdir)
                    os.mkdir(superdir)
                    log.info('Created %s...', superdir)
                path.create(local_path, directory=True, force=True)
            except (FileNotFoundError, PermissionError) as e:
                err_msg = ('Could not create directory "%s" (%s)' %
                           (local_path, e))
                log.critical(err_msg)
                log.info('Removing collection from irods (%s)' % batch_path)
                imain.remove(batch_path, recursive=True, force=True)
                return self.send_errors(err_msg, code=hcodes.HTTP_SERVER_ERROR)

        else:
            log.debug("Batch path already exists on filesytem")

        # Log end (of enable) into RabbitMQ
        log_msg = prepare_message(self,
                                  status='enabled',
                                  user=ingestion_user,
                                  log_string='end')
        log_into_queue(self, log_msg)
        """
            Download the file into the batch folder
        """

        task = CeleryExt.download_batch.apply_async(
            args=[batch_path, str(local_path), json_input],
            queue='ingestion',
            routing_key='ingestion')
        log.info("Async job: %s", task.id)
        return self.return_async_id(task.id)