def setUp(self): # Call father's method super().setUp() log.info("\n### Creating a test token (for ANONYMOUS IRODS user) ###") credentials = json.dumps({'username': self._anonymous_user}) endpoint = self._auth_uri + self._auth_endpoint log.debug('*** Testing anonymous authentication on %s' % endpoint) r = self.app.post(endpoint, data=credentials) self.assertEqual(r.status_code, self._hcodes.HTTP_OK_BASIC) content = self.get_content(r) self.save_token(content.get('token'), suffix=self._anonymous_user) self.irods_vars = detector.services_classes.get('irods').variables self._filename = 'some_file.txt' home_dirname = 'home' self._ipath = str( path.join(path.root(), self.irods_vars.get('zone'), home_dirname, self.irods_vars.get('guest_user'), self._filename)) self._no_permission_path = str( path.join(path.root(), self.irods_vars.get('zone'), home_dirname, 'nonexisting')) log.debug('*** Upload a test file: %s' % self._ipath) # Upload entity in test folder endpoint = self._api_uri + self._register_endpoint + self._ipath r = self.app.put(endpoint, data=dict(file=(io.BytesIO(b"just a test"), self._filename)), headers=self.__class__.auth_header) self.assertEqual(r.status_code, self._hcodes.HTTP_OK_BASIC)
def publish_helper(self, icom, ipath, check_only=True, unpublish=False): from utilities import path current_zone = icom.get_current_zone() ipath_steps = path.parts(ipath) current = '' for ipath_step in ipath_steps: current = path.join(current, ipath_step, return_str=True) # print("PUB STEP:", ipath_step, current, len(current)) # to skip: root dir, zone and home if len(ipath_step) == 1 \ or ipath_step == current_zone or ipath_step == 'home': continue # find out if already published check = self.single_path_check(icom, current_zone, str(current)) # if only checking if check_only and not check: return False # otherwise you want to publish/unpublish this path else: if unpublish: self.single_permission(icom, current, permission=None) else: self.single_permission(icom, current, permission='read') return True
def get_download(self, imain, order_id, order_path, files, restricted=False, index=None): zip_file_name = get_order_zip_file_name(order_id, restricted, index) if zip_file_name not in files: return None zip_ipath = path.join(order_path, zip_file_name, return_str=True) log.debug("Zip irods path: %s", zip_ipath) code = self.no_slash_ticket(imain, zip_ipath) ftype = "" if restricted: ftype += "1" else: ftype += "0" if index is None: ftype += "0" else: ftype += str(index) route = '%s%s/%s/%s/download/%s/c/%s' % (CURRENT_HTTPAPI_SERVER, API_URL, ORDERS_ENDPOINT, order_id, ftype, code) # If metadata already exists, remove them: # FIXME: verify if iticket_code is set and then invalidate it imain.remove_metadata(zip_ipath, 'iticket_code') imain.remove_metadata(zip_ipath, 'download') ################## # Set the url as Metadata in the irods file imain.set_metadata(zip_ipath, download=route) # TOFIX: we should add a database or cache to save this, # not irods metadata (known for low performances) imain.set_metadata(zip_ipath, iticket_code=code) info = files[zip_file_name] return { 'name': zip_file_name, 'url': route, 'size': info.get('content_length', 0) }
def delete(self): json_input = self.get_input() # imain = self.get_service_instance(service_name='irods') imain = self.get_main_irods_connection() order_path = self.get_irods_order_path(imain) local_order_path = str(path.join(MOUNTPOINT, ORDERS_DIR)) log.debug("Order collection: %s", order_path) log.debug("Order path: %s", local_order_path) task = CeleryExt.delete_orders.apply_async( args=[order_path, local_order_path, json_input]) log.info("Async job: %s", task.id) return self.return_async_id(task.id)
def get(self, batch_id): log.info("Batch request: %s", batch_id) # json = {'batch_id': batch_id} # msg = prepare_message( # self, json=json, user=ingestion_user, log_string='start') # log_into_queue(self, msg) ######################## # get irods session # imain = self.get_service_instance(service_name='irods') imain = self.get_main_irods_connection() batch_path = self.get_irods_batch_path(imain, batch_id) local_path = path.join(MOUNTPOINT, INGESTION_DIR, batch_id) log.info("Batch irods path: %s", batch_path) log.info("Batch local path: %s", local_path) batch_status, batch_files = self.get_batch_status( imain, batch_path, local_path) ######################## # if not imain.is_collection(batch_path): if batch_status == MISSING_BATCH: return self.send_errors( "Batch '%s' not enabled or you have no permissions" % batch_id, code=hcodes.HTTP_BAD_NOTFOUND) if batch_status == BATCH_MISCONFIGURATION: log.error('Misconfiguration: %s files in %s (expected 1)', len(batch_files), batch_path) return self.send_errors("Misconfiguration for batch_id %s" % batch_id, code=hcodes.HTTP_BAD_RESOURCE) data = {} data['batch'] = batch_id if batch_status == NOT_FILLED_BATCH: data['status'] = 'not_filled' elif batch_status == ENABLED_BATCH: data['status'] = 'enabled' elif batch_status == PARTIALLY_ENABLED_BATCH: data['status'] = 'partially_enabled' data['files'] = batch_files return data
def delete(self): json_input = self.get_input() # imain = self.get_service_instance(service_name='irods') imain = self.get_main_irods_connection() batch_path = self.get_irods_batch_path(imain) local_batch_path = str(path.join(MOUNTPOINT, INGESTION_DIR)) log.debug("Batch collection: %s", batch_path) log.debug("Batch path: %s", local_batch_path) task = CeleryExt.delete_batches.apply_async( args=[batch_path, local_batch_path, json_input], queue='ingestion', routing_key='ingestion') log.info("Async job: %s", task.id) return self.return_async_id(task.id)
def put(self, batch_id, qc_name): """ Launch a quality check inside a container """ ########################### # get name from batch # imain = self.get_service_instance(service_name='irods') imain = self.get_main_irods_connection() batch_path = self.get_irods_batch_path(imain, batch_id) local_path = path.join(MOUNTPOINT, INGESTION_DIR, batch_id) log.info("Batch irods path: %s", batch_path) log.info("Batch local path: %s", local_path) batch_status, batch_files = self.get_batch_status( imain, batch_path, local_path) if batch_status == MISSING_BATCH: return self.send_errors( "Batch '%s' not found (or no permissions)" % batch_id, code=hcodes.HTTP_BAD_NOTFOUND) if batch_status == NOT_FILLED_BATCH: return self.send_errors("Batch '%s' not yet filled" % batch_id, code=hcodes.HTTP_BAD_RESOURCE) if batch_status == BATCH_MISCONFIGURATION: log.error('Misconfiguration: %s files in %s (expected 1)', len(batch_files), batch_path) return self.send_errors("Misconfiguration for batch_id %s" % batch_id, code=hcodes.HTTP_BAD_RESOURCE) ################### # Parameters (and checks) envs = {} input_json = self.get_input() # TODO: backdoor check - remove me bd = input_json.pop('eudat_backdoor', False) if bd: im_prefix = 'eudat' else: im_prefix = 'maris' log.debug("Image prefix: %s", im_prefix) # input parameters to be passed to container pkey = "parameters" param_keys = [ "request_id", "edmo_code", "datetime", "api_function", "version", "test_mode", pkey ] for key in param_keys: if key == pkey: continue value = input_json.get(key, None) if value is None: return self.send_errors('Missing JSON key: %s' % key, code=hcodes.HTTP_BAD_REQUEST) response = { 'batch_id': batch_id, 'qc_name': qc_name, 'status': 'executed', 'input': input_json, } ################### try: rancher = self.get_or_create_handle() except BaseException as e: log.critical(str(e)) return self.send_errors( 'Cannot establish a connection with Rancher', code=hcodes.HTTP_SERVER_ERROR) container_name = self.get_container_name(batch_id, qc_name, rancher._qclabel) # Duplicated quality checks on the same batch are not allowed container_obj = rancher.get_container_object(container_name) if container_obj is not None: log.error("Docker container %s already exists!", container_name) response['status'] = 'existing' code = hcodes.HTTP_BAD_CONFLICT return self.force_response(response, errors=[response['status']], code=code) docker_image_name = self.get_container_image(qc_name, prefix=im_prefix) ########################### # ## ENVS host_ingestion_path = self.get_ingestion_path_on_host(batch_id) container_ingestion_path = self.get_ingestion_path_in_container() envs['BATCH_DIR_PATH'] = container_ingestion_path from seadata.apis.commons.queue import QUEUE_VARS from seadata.apis.commons.cluster import CONTAINERS_VARS for key, value in QUEUE_VARS.items(): if key in ['enable']: continue elif key == 'user': value = CONTAINERS_VARS.get('rabbituser') elif key == 'password': value = CONTAINERS_VARS.get('rabbitpass') envs['LOGS_' + key.upper()] = value # envs['DB_USERNAME'] = CONTAINERS_VARS.get('dbuser') # envs['DB_PASSWORD'] = CONTAINERS_VARS.get('dbpass') # envs['DB_USERNAME_EDIT'] = CONTAINERS_VARS.get('dbextrauser') # envs['DB_PASSWORD_EDIT'] = CONTAINERS_VARS.get('dbextrapass') # FOLDER inside /batches to store temporary json inputs # TODO: to be put into the configuration JSON_DIR = 'json_inputs' # Mount point of the json dir into the QC container QC_MOUNTPOINT = '/json' json_path_backend = os.path.join(MOUNTPOINT, INGESTION_DIR, JSON_DIR) if not os.path.exists(json_path_backend): log.info("Creating folder %s", json_path_backend) os.mkdir(json_path_backend) json_path_backend = os.path.join(json_path_backend, batch_id) if not os.path.exists(json_path_backend): log.info("Creating folder %s", json_path_backend) os.mkdir(json_path_backend) json_input_file = "input.%s.json" % int(time.time()) json_input_path = os.path.join(json_path_backend, json_input_file) with open(json_input_path, "w+") as f: f.write(json.dumps(input_json)) json_path_qc = self.get_ingestion_path_on_host(JSON_DIR) json_path_qc = os.path.join(json_path_qc, batch_id) envs['JSON_FILE'] = os.path.join(QC_MOUNTPOINT, json_input_file) extra_params = { 'dataVolumes': [ "%s:%s" % (host_ingestion_path, container_ingestion_path), "%s:%s" % (json_path_qc, QC_MOUNTPOINT) ], 'environment': envs } if bd: extra_params['command'] = ['/bin/sleep', '999999'] # log.info(extra_params) ########################### errors = rancher.run(container_name=container_name, image_name=docker_image_name, private=True, extras=extra_params) if errors is not None: if isinstance(errors, dict): edict = errors.get('error', {}) # This case should never happens, since already verified before if edict.get('code') == 'NotUnique': response['status'] = 'existing' code = hcodes.HTTP_BAD_CONFLICT else: response['status'] = 'could NOT be started' response['description'] = edict code = hcodes.HTTP_SERVER_ERROR else: response['status'] = 'failure' code = hcodes.HTTP_SERVER_ERROR return self.force_response(response, errors=[response['status']], code=code) return response
def get(self, order_id, ftype, code): """ downloading (not authenticated) """ log.info("Order request: %s (code '%s')", order_id, code) json = {'order_id': order_id, 'code': code} msg = prepare_message(self, json=json, user='******', log_string='start') log_into_queue(self, msg) log.info("DOWNLOAD DEBUG 1: %s (code '%s')", order_id, code) ################## # imain = self.get_service_instance(service_name='irods') imain = self.get_main_irods_connection() log.info("DOWNLOAD DEBUG 2: %s (code '%s')", order_id, code) order_path = self.get_irods_order_path(imain, order_id) log.info("DOWNLOAD DEBUG 3: %s (code '%s') - %s", order_id, code, order_path) zip_file_name = self.get_filename_from_type(order_id, ftype) if zip_file_name is None: return self.send_errors("Invalid file type %s" % ftype) zip_ipath = path.join(order_path, zip_file_name, return_str=True) error = "Order '%s' not found (or no permissions)" % order_id log.debug("Checking zip irods path: %s", zip_ipath) log.info("DOWNLOAD DEBUG 4: %s (code '%s') - %s", order_id, code, zip_ipath) if not imain.is_dataobject(zip_ipath): log.error("File not found %s", zip_ipath) return self.send_errors({order_id: error}, code=hcodes.HTTP_BAD_NOTFOUND) log.info("DOWNLOAD DEBUG 5: %s (code '%s')", order_id, code) # TOFIX: we should use a database or cache to save this, # not irods metadata (known for low performances) metadata, _ = imain.get_metadata(zip_ipath) log.info("DOWNLOAD DEBUG 6: %s (code '%s')", order_id, code) iticket_code = metadata.get('iticket_code') log.info("DOWNLOAD DEBUG 7: %s (code '%s')", order_id, code) encoded_code = urllib.parse.quote_plus(code) log.info("DOWNLOAD DEBUG 8: %s (code '%s')", order_id, code) if iticket_code != encoded_code: log.error("iticket code does not match %s", zip_ipath) return self.send_errors({order_id: error}, code=hcodes.HTTP_BAD_NOTFOUND) # NOTE: very important! # use anonymous to get the session here # because the ticket supply breaks the iuser session permissions icom = self.get_service_instance(service_name='irods', user='******', password='******', authscheme='credentials') log.info("DOWNLOAD DEBUG 9: %s (code '%s')", order_id, code) # obj = self.init_endpoint() # icom = obj.icommands icom.ticket_supply(code) log.info("DOWNLOAD DEBUG 10: %s (code '%s')", order_id, code) if not icom.test_ticket(zip_ipath): log.error("Invalid iticket code %s", zip_ipath) return self.send_errors({order_id: "Invalid code"}, code=hcodes.HTTP_BAD_NOTFOUND) # # TODO: push pdonorio/prc # tickets = imain.list_tickets() # print(tickets) # iticket mod "$TICKET" add user anonymous # iticket mod "$TICKET" uses 1 # iticket mod "$TICKET" expire "2018-03-23.06:50:00" # ################## # response = {order_id: 'valid'} # return self.force_response(response) headers = { 'Content-Transfer-Encoding': 'binary', 'Content-Disposition': "attachment; filename=%s" % zip_file_name, } msg = prepare_message(self, json=json, log_string='end', status='sent') log_into_queue(self, msg) log.info("DOWNLOAD DEBUG 11: %s (code '%s')", order_id, code) return icom.stream_ticket(zip_ipath, headers=headers)
def post(self): ################## log.debug('POST request on orders') json_input = self.get_input() msg = prepare_message(self, json=json_input, log_string='start') log_into_queue(self, msg) ################## main_key = 'parameters' params = json_input.get(main_key, {}) if len(params) < 1: error = "'%s' missing" % main_key return self.send_errors(error, code=hcodes.HTTP_BAD_REQUEST) ################## key = 'order_number' order_id = params.get(key) if order_id is None: error = "Order ID '%s': missing" % key return self.send_errors(error, code=hcodes.HTTP_BAD_REQUEST) else: order_id = str(order_id) # ################## # Get filename from json input. But it has to follow a # specific pattern, so we ignore client input if it does not... filename = "order_%s_unrestricted" % order_id key = 'file_name' if key in params and not params[key] == filename: log.warn('Client provided wrong filename (%s), will use: %s', params[key], filename) params[key] = filename ################## # PIDS: can be empty if restricted key = 'pids' pids = params.get(key, []) ################## # Create the path log.info("Order request: %s", order_id) # imain = self.get_service_instance(service_name='irods') imain = self.get_main_irods_connection() order_path = self.get_irods_order_path(imain, order_id) log.debug("Order path: %s", order_path) if not imain.is_collection(order_path): obj = self.init_endpoint() # Create the path and set permissions imain.create_collection_inheritable(order_path, obj.username) ################## # Does the zip already exists? # zip_file_name = path.append_compress_extension(order_id) zip_file_name = path.append_compress_extension(filename) zip_ipath = path.join(order_path, zip_file_name, return_str=True) if imain.is_dataobject(zip_ipath): # give error here # return {order_id: 'already exists'} # json_input['status'] = 'exists' json_input['parameters'] = {'status': 'exists'} return json_input ################ # ASYNC if len(pids) > 0: log.info("Submit async celery task") task = CeleryExt.unrestricted_order.apply_async( args=[order_id, order_path, zip_file_name, json_input]) log.info("Async job: %s", task.id) return self.return_async_id(task.id) # ################ # msg = prepare_message(self, log_string='end') # # msg = prepare_message(self, log_string='end', status='created') # msg['parameters'] = { # "request_id": msg['request_id'], # "zipfile_name": params['file_name'], # "zipfile_count": 1, # } # log_into_queue(self, msg) # ################ # # return {order_id: 'created'} # # json_input['status'] = 'created' # json_input['request_id'] = msg['request_id'] # json_input['parameters'] = msg['parameters'] # if len(errors) > 0: # json_input['errors'] = errors # # call Import manager to notify # api = API() # api.post(json_input) return {'status': 'enabled'}
def post(self, batch_id): json_input = self.get_input() obj = self.init_endpoint() imain = self.get_main_irods_connection() batch_path = self.get_irods_batch_path(imain, batch_id) log.info("Batch irods path: %s", batch_path) local_path = path.join(MOUNTPOINT, INGESTION_DIR, batch_id) log.info("Batch local path: %s", local_path) """ Create the batch folder if not exists """ # Log start (of enable) into RabbitMQ log_msg = prepare_message(self, json={'batch_id': batch_id}, user=ingestion_user, log_string='start') log_into_queue(self, log_msg) ################## # Does it already exist? # Create the collection and set permissions in irods if not imain.is_collection(batch_path): imain.create_collection_inheritable(batch_path, obj.username) else: log.warning("Irods batch collection already exists") # Create the folder on filesystem if not path.file_exists_and_nonzero(local_path): # Create superdirectory and directory on file system: try: # TODO: REMOVE THIS WHEN path.create() has parents=True! import os superdir = os.path.join(MOUNTPOINT, INGESTION_DIR) if not os.path.exists(superdir): log.debug('Creating %s...', superdir) os.mkdir(superdir) log.info('Created %s...', superdir) path.create(local_path, directory=True, force=True) except (FileNotFoundError, PermissionError) as e: err_msg = ('Could not create directory "%s" (%s)' % (local_path, e)) log.critical(err_msg) log.info('Removing collection from irods (%s)' % batch_path) imain.remove(batch_path, recursive=True, force=True) return self.send_errors(err_msg, code=hcodes.HTTP_SERVER_ERROR) else: log.debug("Batch path already exists on filesytem") # Log end (of enable) into RabbitMQ log_msg = prepare_message(self, status='enabled', user=ingestion_user, log_string='end') log_into_queue(self, log_msg) """ Download the file into the batch folder """ task = CeleryExt.download_batch.apply_async( args=[batch_path, str(local_path), json_input], queue='ingestion', routing_key='ingestion') log.info("Async job: %s", task.id) return self.return_async_id(task.id)