def test_determines_for_which_jobs_check_status_1(self): """ Given a set of jobs currently in the database, knows for which it is required to check the status. In this case, some jobs require a check, the ones running in the same run environment """ self.create_test_jobs_2() print('here 1') current_run_environment = RUN_CONFIG.get('run_env') print('current_run_environment: ', current_run_environment) with self.flask_app.app_context(): lsf_config = RUN_CONFIG.get('lsf_submission') lsf_host = lsf_config['lsf_host'] status_is_not_error_or_finished = delayed_job_models.DelayedJob.status.notin_( [delayed_job_models.JobStatuses.ERROR, delayed_job_models.JobStatuses.FINISHED] ) lsf_host_is_my_host = delayed_job_models.DelayedJob.lsf_host == lsf_host run_environment_is_my_current_environment = \ delayed_job_models.DelayedJob.run_environment == current_run_environment job_to_check_status_must_be = delayed_job_models.DelayedJob.query.filter( and_(lsf_host_is_my_host, status_is_not_error_or_finished, run_environment_is_my_current_environment) ) lsf_ids_to_check_status_must_be = [job.lsf_job_id for job in job_to_check_status_must_be] job_ids_must_be = [job.id for job in job_to_check_status_must_be] lsf_ids_to_check_got = daemon.get_lsf_job_ids_to_check() self.assertListEqual(lsf_ids_to_check_status_must_be, lsf_ids_to_check_got, msg=f'The jobs for which to check the status were not created ' f'correctly! jobs must be {job_ids_must_be}')
def save_custom_statistics_download_job(job_id, time_taken, desired_format, file_size, es_index, es_query, total_items): """ Saves the custom statistics for the download job :param job_id: id of the job, just as a test that the job exists :param time_taken: Time in seconds taken to do the download :param desired_format: Format requested for the download :param file_size: The resulting file size of the job in bytes :param es_index: Name of the index for which the download was generated :param es_query: Query used for the download :param total_items: The number of items saved in the file """ check_if_job_exists(job_id) doc = { 'desired_format': desired_format, 'download_id': job_id, 'es_index': es_index, 'es_query': es_query, 'file_size': file_size, 'host': 'delayed_jobs_k8s', 'is_new': False, 'request_date': datetime.datetime.utcnow().timestamp() * 1000, 'run_env_type': RUN_CONFIG.get('run_env'), 'time_taken': time_taken, 'total_items': total_items } index_name = RUN_CONFIG.get('job_statistics').get( 'download_job_statistics_index') statistics_saver.save_record_to_elasticsearch(doc, index_name) return {'operation_result': 'Statistics successfully saved!'}
def save_shortened_url(long_url, url_hash): """ Saves the shortened url to es :param long_url: full url to save :param url_hash: hash of the url """ now = datetime.utcnow() time_delta = timedelta( days=RUN_CONFIG.get('url_shortening').get('days_valid')) expiration_date = now + time_delta expires = expiration_date.timestamp() * 1000 index_name = RUN_CONFIG.get('url_shortening').get('index_name') document = { 'long_url': long_url, 'hash': url_hash, 'expires': expires, 'creation_date_2': int(now.timestamp() * 1000) } dry_run = RUN_CONFIG.get('url_shortening').get('dry_run') if dry_run: app_logging.debug( f'Dry run is true, not saving the document {document} to the index {index_name}' ) else: es_data.save_es_doc(index_name, document, refresh='wait_for') return expiration_date
def create_test_jobs_1(self): """ This will create: - 2 Jobs in error state, each running in a different lsf cluster - 2 Jobs in finished state, each running in a different lsf cluster """ run_environment = RUN_CONFIG.get('run_env') lsf_config = RUN_CONFIG.get('lsf_submission') lsf_host = lsf_config['lsf_host'] with self.flask_app.app_context(): i = 0 for status in [delayed_job_models.JobStatuses.FINISHED, delayed_job_models.JobStatuses.ERROR]: for assigned_host in [lsf_host, 'another_host']: job = delayed_job_models.DelayedJob( id=f'Job-{assigned_host}-{status}', type='TEST', lsf_job_id=i, status=status, lsf_host=assigned_host, run_environment=run_environment, created_at=datetime.utcnow(), started_at=datetime.utcnow() + timedelta(seconds=1), finished_at=datetime.utcnow() + timedelta(seconds=2) ) job.output_dir_path = job_submission_service.get_job_output_dir_path(job) os.makedirs(job.output_dir_path, exist_ok=True) delayed_job_models.save_job(job) i += 1
def record_url_was_shortened(): """ Records that an url was shortened. """ doc = { "event": "URL_SHORTENED", "run_env_type": RUN_CONFIG.get('run_env'), "host": 'es_proxy_api_k8s', "request_date": datetime.utcnow().timestamp() * 1000, } index_name = RUN_CONFIG.get('url_shortening').get('statistics_index_name') save_record_to_elasticsearch(doc, index_name)
def record_expired_urls_were_deleted(): """ Records that the expired urls were deleted """ doc = { "event": "EXPIRED_URLS_DELETED", "run_env_type": RUN_CONFIG.get('run_env'), "host": 'es_proxy_api_k8s', "request_date": datetime.utcnow().timestamp() * 1000, } index_name = RUN_CONFIG.get('url_shortening').get('statistics_index_name') save_record_to_elasticsearch(doc, index_name)
def get_json(): """ returns the json response with the swagger description """ yaml_file_path = Path(Path().absolute()).joinpath('app', 'swagger', 'swagger.yaml') with open(yaml_file_path, 'r') as stream: swagger_desc = yaml.safe_load(stream) swagger_desc['host'] = RUN_CONFIG.get('server_public_host') swagger_desc['basePath'] = RUN_CONFIG.get('base_path') return jsonify(swagger_desc)
def check_jobs_status(delete_lock_after_finishing=True): """ The main function of this module. Checks for jobs to check the status, and checks their status in lsf :param delete_lock_after_finishing: determines if explicitly deletes the lock after finishing :return: (sleeptime, jobs_were_checked) the amount of seconds to wait for the next run and if the jobs were checked or not """ lsf_config = RUN_CONFIG.get('lsf_submission') current_lsf_host = lsf_config['lsf_host'] my_hostname = socket.gethostname() min_sleep_time = RUN_CONFIG.get('status_agent').get('min_sleep_time') max_sleep_time = RUN_CONFIG.get('status_agent').get('max_sleep_time') sleep_time = random.uniform(min_sleep_time, max_sleep_time) existing_lock = locks.get_lock_for_lsf_host(current_lsf_host) if existing_lock is not None: print(f'I ({my_hostname}) found a lock, waiting {sleep_time} seconds before checking again') return sleep_time, False else: print(f'Locking LSF status check for {current_lsf_host}, I am {my_hostname}') locks.set_lsf_lock(current_lsf_host, my_hostname) print('Looking for jobs to check...') lsf_job_ids_to_check = get_lsf_job_ids_to_check() print(f'lsf_job_ids_to_check: {lsf_job_ids_to_check}') if len(lsf_job_ids_to_check) == 0: locks.delete_lsf_lock(current_lsf_host) if delete_lock_after_finishing else None return sleep_time, True script_path = prepare_job_status_check_script(lsf_job_ids_to_check) must_run_script = RUN_CONFIG.get('run_status_script', True) if not must_run_script: print('Not running script because run_status_script is False') locks.delete_lsf_lock(current_lsf_host) if delete_lock_after_finishing else None return sleep_time, False try: script_output = get_status_script_output(script_path) os.remove(script_path) # Remove the script after running so it doesn't fill up the NFS print(f'deleted script: {script_path}') parse_bjobs_output(script_output) locks.delete_lsf_lock(current_lsf_host) if delete_lock_after_finishing else None return sleep_time, True except JobStatusDaemonError as error: print(error)
def save_record_to_elasticsearch(doc, index_name): dry_run = RUN_CONFIG.get('job_statistics', {}).get('dry_run', False) es_host = RUN_CONFIG.get('elasticsearch', {}).get('host') if dry_run: app_logging.debug( f'Not actually sending the record to the statistics (dry run): {doc}' ) else: app_logging.debug( f'Sending the following record to the statistics: {doc} ' f'index name: {index_name} es_host: {es_host}') result = ES.index(index=index_name, body=doc, doc_type='_doc') app_logging.debug(f'Result {result}')
def submit_job_to_lsf(job): """ Runs a script that submits the job to LSF :param job: DelayedJob object """ submit_file_path = get_job_submission_script_file_path(job) submission_output_path = Path(submit_file_path).parent.joinpath( 'submission.out') submission_error_path = Path(submit_file_path).parent.joinpath( 'submission.err') lsf_config = RUN_CONFIG.get('lsf_submission') id_rsa_path = lsf_config['id_rsa_file'] run_command = f'{submit_file_path} {id_rsa_path}' app_logging.debug( f'Going to run job submission script, command: {run_command}') must_run_jobs = RUN_CONFIG.get('run_jobs', True) if not must_run_jobs: app_logging.debug(f'Not submitting jobs because run_jobs is False') return submission_process = subprocess.run(run_command.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) app_logging.debug(f'Submission STD Output: \n {submission_process.stdout}') app_logging.debug(f'Submission STD Error: \n {submission_process.stderr}') with open(submission_output_path, 'wb') as submission_out_file: submission_out_file.write(submission_process.stdout) with open(submission_error_path, 'wb') as submission_err_file: submission_err_file.write(submission_process.stderr) return_code = submission_process.returncode app_logging.debug(f'submission return code was: {return_code}') if return_code != 0: raise JobSubmissionError( 'There was an error when running the job submission script! Please check the logs' ) lsf_job_id = get_lsf_job_id(str(submission_process.stdout)) job.lsf_job_id = lsf_job_id job.status = delayed_job_models.JobStatuses.QUEUED delayed_job_models.save_job(job) app_logging.debug(f'LSF Job ID is: {lsf_job_id}')
def test_gets_all_configured_properties(self): """ Tests that it returns all the configured properties for an index """ groups_configuration_manager = get_group_configuration_instance() es_index_prefix = RUN_CONFIG.get('es_index_prefix') index_name = f'{es_index_prefix}activity' props_list_must_be = [ '_metadata.organism_taxonomy.oc_id', '_metadata.assay_data.assay_subcellular_fraction', '_metadata.activity_generated.short_data_validity_comment', '_metadata.assay_data.assay_cell_type', '_metadata.assay_data.assay_organism', '_metadata.assay_data.assay_tissue' ] props_list_got = groups_configuration_manager.get_list_of_configured_properties( index_name) self.assertEqual(sorted(props_list_got), sorted(props_list_must_be), msg='The properties list is not correct!')
def get_status_script_output(script_path): """ Runs the status script and returns a text with the output obtained, if there is an error raises an exception :param script_path: path of the script :return: the text output of stdout """ lsf_config = RUN_CONFIG.get('lsf_submission') id_rsa_path = lsf_config['id_rsa_file'] run_command = f'{script_path} {id_rsa_path}' print(f'Going to run job status script, command: {run_command}') status_check_process = subprocess.run(run_command.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) print(f'Output: \n {status_check_process.stdout}') print(f'Error: \n {status_check_process.stderr}') return_code = status_check_process.returncode print(f'script return code was: {return_code}') if return_code != 0: status_output_path = f'{script_path}.out' status_error_path = f'{script_path}.err' with open(status_output_path, 'wb') as status_out_file: status_out_file.write(status_check_process.stdout) with open(status_error_path, 'wb') as status_err_file: status_err_file.write(status_check_process.stderr) raise JobStatusDaemonError('There was an error when running the job status script! Please check the logs') else: return status_check_process.stdout.decode()
def prepare_job_status_check_script(lsf_job_ids): """ Prepares the script that will check for the job status to LSF :lsf_job_ids: the list of job ids for which check the status :return: the final path of the script that was created """ job_status_script_template_path = os.path.join(Path().absolute(), 'templates', 'get_jobs_status.sh') with open(job_status_script_template_path, 'r') as template_file: job_status_template = template_file.read() lsf_config = RUN_CONFIG.get('lsf_submission') lsf_user = lsf_config['lsf_user'] lsf_host = lsf_config['lsf_host'] job_submission_script = job_status_template.format( LSF_JOB_IDS=' '.join([str(lsf_job_id) for lsf_job_id in lsf_job_ids]), LSF_USER=lsf_user, LSF_HOST=lsf_host ) status_script_path = get_check_job_status_script_path() status_script_path.parent.mkdir(parents=True, exist_ok=True) with open(status_script_path, 'w') as status_script_file: status_script_file.write(job_submission_script) print(f'created script: {status_script_path}') # make sure file is executable file_stats = os.stat(status_script_path) os.chmod(status_script_path, file_stats.st_mode | stat.S_IEXEC) return status_script_path
def get_items_with_context(index_name, raw_es_query, raw_context, raw_contextual_sort_data='{}'): """ :param index_name: name of the index to query :param raw_es_query: es_query stringifyied :param raw_context: context dict stringifyied :param raw_contextual_sort_data: :return: the items in the es_query with the context given in the context description """ context_dict = json.loads(raw_context) context, total_results = context_loader.get_context(context_dict) id_properties = es_mappings.get_id_properties_for_index(index_name) # create a context index so access is faster context_id = context_dict['context_id'] context_index = context_loader.load_context_index(context_id, id_properties, context) if raw_contextual_sort_data is not None: contextual_sort_data = json.loads(raw_contextual_sort_data) else: contextual_sort_data = {} search_data_with_injections = get_search_data_with_injections(raw_es_query, contextual_sort_data, id_properties, total_results, context_index) raw_search_data_with_injections = json.dumps(search_data_with_injections) es_response = es_data.get_es_response(index_name, json.loads(raw_search_data_with_injections)) add_context_values_to_response(es_response, context_index) metadata = { 'total_results': len(context_index), 'max_results_injected': RUN_CONFIG.get('filter_query_max_clauses') } return es_response, metadata
def do_multisearch(body): """ :param body: body of the multisearch :return: the result of the multisearch """ cache_key = get_multisearch_cache_key(body) app_logging.debug(f'cache_key: {cache_key}') start_time = time.time() cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: end_time = time.time() time_taken = end_time - start_time app_logging.debug(f'results were cached') record_that_response_was_cached('multisearch', {'query': body}, time_taken) return cache_response app_logging.debug(f'results were not cached') start_time = time.time() result = ES.msearch(body=body) end_time = time.time() time_taken = end_time - start_time record_that_response_not_cached('multisearch', {'query': body}, time_taken) seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds') cache.fail_proof_set(key=cache_key, value=result, timeout=seconds_valid) return result
def test_gets_config_for_a_list_of_properties(self): """ Test it gets the configuration for a list of properties """ groups_configuration_manager = get_group_configuration_instance() es_index_prefix = RUN_CONFIG.get('es_index_prefix') index_name = f'{es_index_prefix}activity' props = [ '_metadata.activity_generated.short_data_validity_comment', '_metadata.assay_data.assay_cell_type' ] configs_got = groups_configuration_manager.get_config_for_props_list( index_name, props) config = configs_got[0] self.assertEqual(config['index_name'], index_name) self.assertEqual(config['prop_id'], props[0]) self.assertTrue(config['aggregatable']) self.assertEqual(config['type'], 'string') self.assertEqual(config['label'], 'My custom label') self.assertEqual(config['label_mini'], 'My cstm lbl') config = configs_got[1] self.assertEqual(config['index_name'], index_name) self.assertEqual(config['prop_id'], props[1]) self.assertTrue(config['aggregatable']) self.assertEqual(config['type'], 'string') self.assertEqual(config['label'], 'Assay Data Cell Type') self.assertEqual(config['label_mini'], 'Assay Data Cell Type')
def parse_search(search_term, es_indexes, selected_es_index): """ :param search_term: Term to parse :param es_indexes: indexes in which the search will be done, separated by a comma :param selected_es_index: es index to focus on :return: the query to send to elasticsearch based on the search term provided """ cache_key = f'{search_term}-{es_indexes}-{selected_es_index}' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') parsed_query = parse_query_str(search_term) indexes_list = es_indexes.split(',') best_queries, sorted_indexes_by_score = QueryBuilder.get_best_es_query( parsed_query, indexes_list, selected_es_index) response_dict = { 'parsed_query': parsed_query, 'best_es_base_queries': best_queries, 'sorted_indexes_by_score': sorted_indexes_by_score } seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds') cache.fail_proof_set(key=cache_key, value=response_dict, timeout=seconds_valid) return response_dict
def test_parses_the_output_of_bjobs_finished_job(self): """ Generates mock jobs, then sends a mock output to the the function to test that it interpreted the output accordingly. This test focuses on a job that switched to finished state. """ self.create_test_jobs_0() sample_output = self.load_sample_file('app/job_status_daemon/test/data/sample_lsf_output_1.txt') with self.flask_app.app_context(): daemon.parse_bjobs_output(sample_output) # job with lsf id 0 should be in running state now lsf_job_id = 4 job = delayed_job_models.get_job_by_lsf_id(lsf_job_id) status_got = job.status status_must_be = delayed_job_models.JobStatuses.FINISHED self.assertEqual(status_got, status_must_be, msg='The status of the job was not changed accordingly!') finished_time = job.finished_at delta = timedelta(days=RUN_CONFIG.get('job_expiration_days')) expiration_date_must_be = finished_time + delta expiration_date_got = job.expires_at self.assertEquals(expiration_date_got, expiration_date_must_be, msg='the job expiration date was not calculated correctly!')
def test_gets_config_for_a_contextual_property(self): """ tests gets the config for a virtual contextual property """ configuration_manager = get_property_configuration_instance() es_index_prefix = RUN_CONFIG.get('es_index_prefix') index_name = f'{es_index_prefix}molecule' prop_id = '_context.similarity' config_got = configuration_manager.get_config_for_prop( index_name, prop_id) self.assertEqual(config_got['prop_id'], prop_id, 'The prop_id was not set up properly!') self.assertFalse(config_got['aggregatable']) self.assertTrue(config_got['sortable']) self.assertEqual(config_got['type'], 'double') self.assertEqual(config_got['label'], 'Similarity') self.assertEqual(config_got['label_mini'], 'Similarity') self.assertEqual(config_got['is_virtual'], True, 'This is a virtual property!') self.assertEqual(config_got['is_contextual'], True, 'This is a contextual property!')
def get_or_create(job_type, job_params, docker_image_url, input_files_hashes={}): """ Based on the type and the parameters given, returns a job if it exists, if not it creates it and returns it. :param job_type: type of job to get or create :param job_params: parameters of the job :param input_files_hashes: :return: the job corresponding to those parameters. """ job_id = generate_job_id(job_type, job_params, docker_image_url, input_files_hashes) existing_job = DelayedJob.query.filter_by(id=job_id).first() if existing_job is not None: return existing_job run_environment = RUN_CONFIG.get('run_env') job = DelayedJob(id=job_id, type=job_type, raw_params=json.dumps(job_params, sort_keys=True), docker_image_url=docker_image_url, run_environment=run_environment) DB.session.add(job) DB.session.commit() return job
def get_lsf_job_ids_to_check(lsf_host): """ :param lsf_host: lsf host for which to return the jobs to check :return: a list of LSF job IDs for which it is necessary check the status in the LSF cluster. The jobs that are checked are the ones that: 1. Were submitted to the same LSF cluster that I am running with (defined in configuration) 2. Are not in Error or Finished state. """ DB.session.commit() status_is_not_error_or_finished = DelayedJob.status.notin_( [JobStatuses.ERROR, JobStatuses.FINISHED]) lsf_host_is_my_host = DelayedJob.lsf_host == lsf_host current_run_environment = RUN_CONFIG.get('run_env') run_environment_is_my_current_environment = \ DelayedJob.run_environment == current_run_environment job_to_check_status = DelayedJob.query.filter( and_(lsf_host_is_my_host, status_is_not_error_or_finished, run_environment_is_my_current_environment)) # Make sure there are no None value. This can happen when the server has created a job and is submitting it, and the # same time the daemon asks for jobs to check. This makes the daemon crash. ids = [ job.lsf_job_id for job in job_to_check_status if job.lsf_job_id is not None ] DB.session.commit() return ids
def get_config_for_prop(self, index_name, prop_id): """ :param index_name: name of the index to which the property belongs :param prop_id: full path of the property, such as '_metadata.assay_data.assay_subcellular_fraction' :return: a dict describing the configuration of a property """ cache_key = f'config_for_{index_name}-{prop_id}' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') app_logging.debug( f'getting property config for {prop_id} of index {index_name}') es_property_description = self.get_property_base_es_description( index_name, prop_id) property_override_description = self.get_property_base_override_description( index_name, prop_id) config = self.get_merged_prop_config(index_name, prop_id, es_property_description, property_override_description) seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds') cache.fail_proof_set(key=cache_key, value=config, timeout=seconds_valid) return config
def get_list_of_configured_properties(self, index_name): """ :param index_name: the index to check :return: a list of all the configured properties among all the groups """ cache_key = f'facets_configured_properties_for_{index_name}' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') with open(self.facets_groups_file_path, 'rt') as groups_file: groups_config = yaml.load(groups_file, Loader=yaml.FullLoader) index_groups = groups_config.get(index_name) if index_groups is None: raise self.FacetsGroupsConfigurationManagerError( f'The index {index_name} does not have a configuration set up!' ) properties_identified = set() for subgroup in index_groups.values(): for properties_list in subgroup.values(): properties_identified.update(properties_list) seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds') cache.fail_proof_set(key=cache_key, value=properties_identified, timeout=seconds_valid) return list(properties_identified)
def test_gets_config_for_a_group_with_default_and_additional_properties( self): """ tests that gets config for a group with default and additional properties """ groups_configuration_manager = get_group_configuration_instance() es_index_prefix = RUN_CONFIG.get('es_index_prefix') index_name = f'{es_index_prefix}activity' group_name = 'table' configs_got = groups_configuration_manager.get_config_for_group( index_name, group_name)['properties'] with open(groups_configuration_manager.groups_file_path, 'rt') as groups_config_file: groups_must_be = yaml.load(groups_config_file, Loader=yaml.FullLoader) group_must_be = groups_must_be[index_name][group_name] for sub_group, props_list_must_be in group_must_be.items(): props_list_got = [ conf['prop_id'] for conf in configs_got[sub_group] ] self.assertTrue(props_list_got == props_list_must_be)
def test_gets_config_for_a_virtual_property(self): """ Tests gets the correct config for a virtual property """ configuration_manager = get_property_configuration_instance() with open(configuration_manager.override_file_path) as override_file: override_config_must_be = yaml.load(override_file, Loader=yaml.FullLoader) es_index_prefix = RUN_CONFIG.get('es_index_prefix') index_name = f'{es_index_prefix}molecule' prop_id = 'trade_names' config_got = configuration_manager.get_config_for_prop( index_name, prop_id) property_config_must_be = override_config_must_be[index_name][ prop_id] self.assertEqual(config_got['prop_id'], prop_id, 'The prop_id was not set up properly!') self.assertEqual(config_got['based_on'], property_config_must_be['based_on'], 'The based_on was not set up properly!') self.assertEqual(config_got['label'], property_config_must_be['label'], 'The label was not set up properly!') self.assertFalse(config_got['aggregatable'], 'This property should not be aggregatable') self.assertEqual(config_got['is_virtual'], True, 'This is a virtual property!') self.assertEqual(config_got['is_contextual'], False, 'This is not a contextual property!')
def check_smiles(term_dict: dict): ws_base_path = RUN_CONFIG.get('chembl_api').get('ws_url') try: chembl_ids = [] next_url_path = '{ws_path}/molecule.json?molecule_structures__canonical_smiles__flexmatch={smiles}'\ .format(ws_path=ws_base_path, smiles=urllib.parse.quote(term_dict['term'])) while next_url_path: response = requests.get( WS_DOMAIN + next_url_path, headers={'Accept': 'application/json'}, timeout=5 ) json_response = response.json() if 'error_message' in json_response: return None for molecule_i in json_response['molecules']: chembl_ids.append(molecule_i['molecule_chembl_id']) next_url_path = json_response['page_meta']['next'] if chembl_ids: term_dict['references'].append( { 'type': 'smiles', 'label': 'SMILES', 'chembl_ids': get_chembl_id_list_dict(chembl_ids), 'include_in_query': True, 'chembl_entity': 'compound' } ) except: traceback.print_exc()
def test_parses_the_output_of_bjobs_when_no_jobs_were_found(self): """ Generates mock jobs, then sends a mock output to the the function to test that it interpreted the output accordingly """ self.create_test_jobs_0() sample_output = self.load_sample_file('app/job_status_daemon/test/data/sample_lsf_output_0.txt') with self.flask_app.app_context(): daemon.parse_bjobs_output(sample_output) # No status should have changed for status_must_be in [delayed_job_models.JobStatuses.CREATED, delayed_job_models.JobStatuses.QUEUED, delayed_job_models.JobStatuses.RUNNING, delayed_job_models.JobStatuses.FINISHED, delayed_job_models.JobStatuses.ERROR]: lsf_config = RUN_CONFIG.get('lsf_submission') lsf_host = lsf_config['lsf_host'] for assigned_host in [lsf_host, 'another_host']: id_to_check = f'Job-{assigned_host}-{status_must_be}' job = delayed_job_models.get_job_by_id(id_to_check) status_got = job.status self.assertEqual(status_got, status_must_be, msg='The status was modified! This should have not modified the status')
def get_url_shortening(url_hash): """ :param url_hash: hash of the url to look for :return: url shortening dict from elasticsearch """ index_name = RUN_CONFIG.get('url_shortening').get('index_name') es_query = { "query": { "query_string": { "query": f'"{url_hash}"', "default_field": "hash" } } } shortening_response = es_data.get_es_response(index_name, es_query, ignore_cache=True) total_hits = shortening_response['hits']['total']['value'] app_logging.debug(f'total_hits {total_hits}') if shortening_response['hits']['total']['value'] == 0: return None raw_document = shortening_response['hits']['hits'][0] return raw_document
def test_collects_the_urls_for_the_outputs_of_a_finished_job(self): """ Generates some mock jobs, then sends a mock output to the function to test that it interprets that it finished. The finished job should have now the output files set """ self.create_test_jobs_0() sample_output = self.load_sample_file('app/job_status_daemon/test/data/sample_lsf_output_1.txt') with self.flask_app.app_context(): # Prepare the test scenario lsf_job_id = 4 job = delayed_job_models.get_job_by_lsf_id(lsf_job_id) output_urls_must_be = [] for i in range(0, 2): for subdir in ['', 'subdir/']: out_file_name = f'output_{i}.txt' out_file_path = f'{job.output_dir_path}/{subdir}{out_file_name}' os.makedirs(Path(out_file_path).parent, exist_ok=True) with open(out_file_path, 'wt') as out_file: out_file.write(f'This is output file {i}') server_base_path = RUN_CONFIG.get('base_path', '') if server_base_path == '': server_base_path_with_slash = '' else: server_base_path_with_slash = f'{server_base_path}/' outputs_base_path = RUN_CONFIG.get('outputs_base_path') output_url_must_be = f'/{server_base_path_with_slash}{outputs_base_path}/' \ f'{job.id}/{subdir}{out_file_name}' output_urls_must_be.append(output_url_must_be) # END to prepare the test scenario daemon.parse_bjobs_output(sample_output) job_outputs_got = job.output_files self.assertEqual(len(job_outputs_got), 4, msg='There must be 4 outputs for this job!') for output_file in job.output_files: output_url_got = output_file.public_url self.assertIn(output_url_got, output_urls_must_be, msg='The output url was not set correctly')
def create_app(): """ Creates the flask app :return: Delayed jobs flask app """ base_path = RUN_CONFIG.get('base_path', '') outputs_base_path = RUN_CONFIG.get('outputs_base_path', 'outputs') flask_app = Flask(__name__, static_url_path=f'{base_path}/{outputs_base_path}', static_folder=job_submission_service.JOBS_OUTPUT_DIR) # flask_app.config['SERVER_NAME'] = RUN_CONFIG.get('server_public_host') flask_app.config['SQLALCHEMY_DATABASE_URI'] = RUN_CONFIG.get( 'sql_alchemy').get('database_uri') flask_app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = RUN_CONFIG.get( 'sql_alchemy').get('track_modifications') flask_app.config['SECRET_KEY'] = RUN_CONFIG.get('server_secret_key') enable_cors = RUN_CONFIG.get('enable_cors', False) if enable_cors: CORS(flask_app) with flask_app.app_context(): DB.init_app(flask_app) CACHE.init_app(flask_app) RATE_LIMITER.init_app(flask_app) for handler in flask_app.logger.handlers: RATE_LIMITER.logger.addHandler(handler) create_tables = RUN_CONFIG.get('sql_alchemy').get( 'create_tables', False) if create_tables: DB.create_all() generate_default_config = RUN_CONFIG.get('generate_default_config', False) if generate_default_config: delayed_job_models.generate_default_job_configs() flask_app.register_blueprint(SWAGGER_BLUEPRINT, url_prefix=f'{base_path}/swagger') flask_app.register_blueprint(SUBMISSION_BLUEPRINT, url_prefix=f'{base_path}/submit') flask_app.register_blueprint(JOB_STATUS_BLUEPRINT, url_prefix=f'{base_path}/status') flask_app.register_blueprint(ADMIN_AUTH_BLUEPRINT, url_prefix=f'{base_path}/admin') flask_app.register_blueprint(ADMIN_TASKS_BLUEPRINT, url_prefix=f'{base_path}/admin') flask_app.register_blueprint( CUSTOM_STATISTICS_BLUEPRINT, url_prefix=f'{base_path}/custom_statistics') return flask_app