def get_simplified_property_mapping(index_name, property_id):
    """
    :param index_name: index name where the property belongs
    :param property_id: id of the property to check
    :return: a dict with the mappings of the property in the index given as parameter
    """
    app_logging.debug(f'Getting mapping of {property_id} in {index_name}')
    index_mapping = get_index_mapping(index_name)
    property_accessor = property_id.replace('.', '.properties.')
    property_raw_mapping = dict_property_access.get_property_value(index_mapping, property_accessor)

    if property_raw_mapping is None:
        return None

    label, label_mini = utils.get_labels_from_property_name(index_name, property_id)

    simplified_mapping = {
        'type': get_simplified_property_type(property_raw_mapping),
        'aggregatable': get_simplified_property_aggregatability(property_raw_mapping),
        'sortable': get_simplified_property_aggregatability(property_raw_mapping),
        'label': label,
        'label_mini': label_mini
    }

    return simplified_mapping
def get_es_data(index_name, raw_es_query, raw_context, raw_contextual_sort_data):
    """
    :param index_name: name of the index to query
    :param raw_es_query: stringifyied version of the query to send to elasticsearch
    :param raw_context: stringifyied version of a JSON object describing the context of the query
    :param id_property: property that identifies every item. Required when context provided
    :param raw_contextual_sort_data: description of sorting if sorting by contextual properties
    :return: Returns the json response from elasticsearch and some metadata if necessary
    """
    if raw_context is None:
        app_logging.debug('No context detected')
        es_query = json.loads(raw_es_query)
        es_response = es_data.get_es_response(index_name, es_query)
        response = {
            'es_response': es_response,
        }
        return response

    app_logging.debug(f'Using context: {raw_context}')
    es_response, metadata = get_items_with_context(index_name, raw_es_query, raw_context, raw_contextual_sort_data)

    response = {
        'es_response': es_response,
        'metadata': metadata
    }
    return response
def save_shortened_url(long_url, url_hash):
    """
    Saves the shortened url to es
    :param long_url: full url to save
    :param url_hash: hash of the url
    """

    now = datetime.utcnow()
    time_delta = timedelta(
        days=RUN_CONFIG.get('url_shortening').get('days_valid'))
    expiration_date = now + time_delta
    expires = expiration_date.timestamp() * 1000

    index_name = RUN_CONFIG.get('url_shortening').get('index_name')

    document = {
        'long_url': long_url,
        'hash': url_hash,
        'expires': expires,
        'creation_date_2': int(now.timestamp() * 1000)
    }

    dry_run = RUN_CONFIG.get('url_shortening').get('dry_run')
    if dry_run:
        app_logging.debug(
            f'Dry run is true, not saving the document {document} to the index {index_name}'
        )
    else:
        es_data.save_es_doc(index_name, document, refresh='wait_for')

    return expiration_date
def get_url_shortening(url_hash):
    """
    :param url_hash: hash of the url to look for
    :return: url shortening dict from elasticsearch
    """

    index_name = RUN_CONFIG.get('url_shortening').get('index_name')
    es_query = {
        "query": {
            "query_string": {
                "query": f'"{url_hash}"',
                "default_field": "hash"
            }
        }
    }

    shortening_response = es_data.get_es_response(index_name,
                                                  es_query,
                                                  ignore_cache=True)
    total_hits = shortening_response['hits']['total']['value']
    app_logging.debug(f'total_hits {total_hits}')

    if shortening_response['hits']['total']['value'] == 0:
        return None

    raw_document = shortening_response['hits']['hits'][0]
    return raw_document
    def get_merged_prop_config(self, index_name, prop_id,
                               es_property_description,
                               property_override_description):
        """
        :param index_name: name of the index to which the property belongs
        :param prop_id: full path of the property, such as  '_metadata.assay_data.assay_subcellular_fraction'
        :param es_property_description: dict describing the property taken from es
        :param property_override_description: dict describing the property taken from the override
        :return: the merged configuration between what was found in es and the override config
        """
        found_in_es = es_property_description is not None
        app_logging.debug(
            f'Property {prop_id} of index {index_name} found_in_es: {found_in_es}'
        )
        found_in_override = property_override_description is not None
        app_logging.debug(
            f'Property {prop_id} of index {index_name} found_in_override: {found_in_override}'
        )

        if not found_in_es and not found_in_override:
            raise self.PropertiesConfigurationManagerError(
                f'The property {prop_id} of index {index_name} does not exist in elasticsearch or as virtual property'
            )

        is_virtual = not found_in_es and found_in_override
        app_logging.debug(
            f'Property {prop_id} of index {index_name} is_virtual: {is_virtual}'
        )

        if not is_virtual:
            return {
                'index_name': index_name,
                'prop_id': prop_id,
                **es_property_description,
                **(property_override_description if property_override_description is not None else {})
            }

        base_config = {
            'index_name': index_name,
            'prop_id': prop_id,
            'is_virtual': True
        }

        based_on = property_override_description.get('based_on')

        is_contextual = based_on is None
        app_logging.debug(
            f'Property {prop_id} of index {index_name} is_contextual: {is_contextual}'
        )

        if not is_contextual:
            app_logging.debug(
                f'Property {prop_id} of index {index_name} based_on: {based_on}'
            )
            return self.get_virtual_non_contextual_property_config(
                base_config, based_on, property_override_description)

        return self.get_virtual_contextual_property_config(
            base_config, property_override_description)
Exemplo n.º 6
0
def update_es_doc(index_name, updated_fields, doc_id):
    """
    Updates the document with the document provided
    :param doc_id: id of the document to update
    :param index_name: index where to save the document
    :param updated_fields: updated fields of the document to save
    """
    app_logging.debug(f'Updating the document with id {doc_id} with this {updated_fields} on the index {index_name}')
    result = ES.update(index=index_name, body=updated_fields, doc_type='_doc', id=doc_id)
    app_logging.debug(f'Result {result}')
Exemplo n.º 7
0
def save_es_doc(index_name, document, **kwargs):
    """
    Saves to elasticsearch the document at the index indicated by parameter
    :param index_name: index in which to save the document
    :param document: document to save
    :param kwargs: keyword args to pass to the index function:
    https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.Elasticsearch.index
    """
    app_logging.debug(f'Saving the document {document} to the index {index_name}')
    result = ES.index(index=index_name, body=document, doc_type='_doc', **kwargs)
    app_logging.debug(f'Result {result}')
Exemplo n.º 8
0
def parse_args_and_submit_job(job_type, form_args, file_args):
    app_logging.debug(f'args received: {json.dumps(form_args)}')
    docker_image_url = delayed_job_models.get_docker_image_url(job_type)
    job_params_only = {
        param_key: parameter
        for (param_key, parameter) in form_args.items()
    }
    job_inputs_only = get_job_input_files_desc(file_args)
    input_files_hashes = get_input_files_hashes(job_inputs_only)

    return submit_job(job_type, job_inputs_only, input_files_hashes,
                      docker_image_url, job_params_only)
    def get_config_for_prop(self, index_name, prop_id):
        """
        :param index_name: name of the index to which the property belongs
        :param prop_id: full path of the property, such as  '_metadata.assay_data.assay_subcellular_fraction'
        :return: a dict describing the configuration of a property
        """
        cache_key = f'config_for_{index_name}-{prop_id}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        app_logging.debug(
            f'getting property config for {prop_id} of index {index_name}')
        es_property_description = self.get_property_base_es_description(
            index_name, prop_id)
        property_override_description = self.get_property_base_override_description(
            index_name, prop_id)
        config = self.get_merged_prop_config(index_name, prop_id,
                                             es_property_description,
                                             property_override_description)

        seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=config,
                             timeout=seconds_valid)
        return config
Exemplo n.º 10
0
def get_es_data():
    """
    :return: the json response with the data from elasticsearch
    """
    form_data = request.form

    index_name = sanitise_parameter(form_data.get('index_name'))
    raw_es_query = sanitise_parameter(form_data.get('es_query'))
    raw_context = sanitise_parameter(form_data.get('context_obj'))
    raw_contextual_sort_data = sanitise_parameter(
        form_data.get('contextual_sort_data'))

    app_logging.debug(f'index_name: {index_name}')
    app_logging.debug(f'raw_es_query: {raw_es_query}')
    app_logging.debug(f'raw_context: {raw_context}')
    app_logging.debug(f'raw_contextual_sort_data: {raw_contextual_sort_data}')

    try:

        json_response = es_proxy_service.get_es_data(index_name, raw_es_query,
                                                     raw_context,
                                                     raw_contextual_sort_data)

        http_response = jsonify(json_response)
        http_cache_utils.add_cache_headers_to_response(http_response)
        return http_response

    except es_proxy_service.ESProxyServiceError as error:

        abort(500, msg=f'Internal server error: {str(error)}')
Exemplo n.º 11
0
def prepare_output_dir(job):
    """
    Makes sure to create the output dir for the job
    :param job: job object for which create the job output
    """

    job_output_dir = get_job_output_dir_path(job)

    if os.path.exists(job_output_dir):
        utils.delete_directory_robustly(job_output_dir)

    job.output_dir_path = job_output_dir
    delayed_job_models.save_job(job)
    os.makedirs(job_output_dir, exist_ok=True)

    app_logging.debug(f'Job output dir is {job_output_dir}')
Exemplo n.º 12
0
def create_job_run_dir(job):
    """
    CReates the directory where the job will run
    :param job: job object for which to create the job run directory
    """
    job_run_dir = get_job_run_dir(job)
    job_input_files_dir = get_job_input_files_dir(job)

    if os.path.exists(job_run_dir):
        utils.delete_directory_robustly(job_run_dir)

    job.run_dir_path = job_run_dir
    delayed_job_models.save_job(job)
    os.makedirs(job_run_dir, exist_ok=True)
    os.makedirs(job_input_files_dir, exist_ok=True)

    app_logging.debug(f'Job run dir is {job_run_dir}')
Exemplo n.º 13
0
def get_classification_tree():
    """
    :return: the go slim target classification tree
    """

    cache_key = 'target_classifications_go_slim_1'
    app_logging.debug(f'cache_key: {cache_key}')

    cache_response = cache.fail_proof_get(key=cache_key)

    if cache_response is not None:
        app_logging.debug('results are in cache')
        return cache_response

    tree_generator = GoSlimTreeGenerator()
    final_tree = tree_generator.get_classification_tree()

    cache_time = int(3.154e7)
    cache.fail_proof_set(key=cache_key, value=final_tree, timeout=cache_time)

    return final_tree
Exemplo n.º 14
0
def create_and_submit_job(job_type, input_files_desc, input_files_hashes,
                          docker_image_url, job_params):
    """
    Creates a job and submits if to LSF
    :param job_type: type of job to submit
    :param input_files_desc: dict with the paths of the input files
    :param input_files_hashes: dict with the hashes of the input files
    :param docker_image_url: image of the container to use
    :param job_params: parameters of the job
    :return: the job object created
    """
    job = delayed_job_models.get_or_create(job_type, job_params,
                                           docker_image_url,
                                           input_files_hashes)
    job.progress = 0
    job.started_at = None
    job.finished_at = None
    delayed_job_models.save_job(job)
    app_logging.debug(f'Submitting Job: {job.id}')
    prepare_job_and_submit(job, input_files_desc)
    return job
Exemplo n.º 15
0
def do_multisearch(body):
    """
    :param body: body of the multisearch
    :return: the result of the multisearch
    """
    cache_key = get_multisearch_cache_key(body)
    app_logging.debug(f'cache_key: {cache_key}')

    start_time = time.time()
    cache_response = cache.fail_proof_get(key=cache_key)
    if cache_response is not None:
        end_time = time.time()
        time_taken = end_time - start_time
        app_logging.debug(f'results were cached')
        record_that_response_was_cached('multisearch', {'query': body}, time_taken)
        return cache_response

    app_logging.debug(f'results were not cached')

    start_time = time.time()
    result = ES.msearch(body=body)
    end_time = time.time()
    time_taken = end_time - start_time

    record_that_response_not_cached('multisearch', {'query': body}, time_taken)

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
    cache.fail_proof_set(key=cache_key, value=result, timeout=seconds_valid)

    return result
def parse_search(search_term, es_indexes, selected_es_index):
    """
    :param search_term: Term to parse
    :param es_indexes: indexes in which the search will be done, separated by a comma
    :param selected_es_index: es index to focus on
    :return: the query to send to elasticsearch based on the search term provided
    """

    cache_key = f'{search_term}-{es_indexes}-{selected_es_index}'
    app_logging.debug(f'cache_key: {cache_key}')

    cache_response = cache.fail_proof_get(key=cache_key)
    if cache_response is not None:
        app_logging.debug(f'results were cached')
        return cache_response

    app_logging.debug(f'results were not cached')

    parsed_query = parse_query_str(search_term)
    indexes_list = es_indexes.split(',')
    best_queries, sorted_indexes_by_score = QueryBuilder.get_best_es_query(
        parsed_query, indexes_list, selected_es_index)

    response_dict = {
        'parsed_query': parsed_query,
        'best_es_base_queries': best_queries,
        'sorted_indexes_by_score': sorted_indexes_by_score
    }

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
    cache.fail_proof_set(key=cache_key,
                         value=response_dict,
                         timeout=seconds_valid)

    return response_dict
Exemplo n.º 17
0
    def get_list_of_configured_properties(self, index_name):
        """
        :param index_name: the index to check
        :return: a list of all the configured properties among all the groups
        """

        cache_key = f'facets_configured_properties_for_{index_name}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.facets_groups_file_path, 'rt') as groups_file:

            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)
            index_groups = groups_config.get(index_name)
            if index_groups is None:
                raise self.FacetsGroupsConfigurationManagerError(
                    f'The index {index_name} does not have a configuration set up!'
                )
            properties_identified = set()
            for subgroup in index_groups.values():
                for properties_list in subgroup.values():
                    properties_identified.update(properties_list)

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=properties_identified,
                             timeout=seconds_valid)
        return list(properties_identified)
def get_context(context_dict):
    """
    Returns the context described by the context dict
    :param context_dict: dictionary describing the context
    :return: the context loaded as an object
    """
    context_url = get_context_url(context_dict)
    app_logging.debug(f'Loading context from url: {context_url}')
    context_request = requests.get(context_url)

    if context_request.status_code != 200:
        raise ContextLoaderError(
            'There was an error while loading the context: ' +
            context_request.text)

    results = context_request.json()['search_results']

    total_results = len(results)
    if total_results > WEB_RESULTS_SIZE_LIMIT:
        results = results[0:WEB_RESULTS_SIZE_LIMIT]

    return results, total_results
def expand_url(url_hash):
    """
    :param url_hash: hash of the url to expand
    :return: the expanded url corresponding to the hash
    """
    raw_document = get_url_shortening(url_hash)
    if raw_document is None:
        raise URLNotFoundError(f'No url correspond to the hash {url_hash}')

    keep_alive = RUN_CONFIG.get('url_shortening').get('keep_alive', False)

    if not keep_alive:
        expiration_timestamp = raw_document['_source']['expires']
        expires = datetime.fromtimestamp(expiration_timestamp / 1000)
    else:
        expires = extend_expiration_date(raw_document)
        app_logging.debug(
            f'keep_alive is on, new expiration date is {expires}')

    long_url = raw_document['_source']['long_url']
    statistics_saver.record_url_was_expanded()
    trigger_deletion_of_expired_urls()
    return {'long_url': long_url, 'expires': expires}
Exemplo n.º 20
0
    def get_facets_config_for_group(self, index_name, group_name):
        """
        :param index_name: name of the index to which the group belongs
        :param group_name: name of the facets group
        :return: the configuration for the facets group
        """

        cache_key = f'facets_config_for_group_{index_name}-{group_name}_3'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.facets_groups_file_path, 'rt') as groups_file:

            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)

            index_groups = groups_config.get(index_name)
            if index_groups is None:
                raise self.FacetsGroupsConfigurationManagerError(
                    f'The index {index_name} does not have a configuration set up!'
                )

            group_config = index_groups.get(group_name)
            if group_config is None:
                raise self.FacetsGroupsConfigurationManagerError(
                    f'The group {group_name} does not exist in index {index_name}!'
                )

            default_properties = group_config.get('default', {})
            optional_properties = group_config.get('optional', {})

        config = {
            'properties': {
                'default':
                self.get_facets_config_for_properties(default_properties,
                                                      index_name),
                'optional':
                self.get_facets_config_for_properties(optional_properties,
                                                      index_name)
            }
        }

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=config,
                             timeout=seconds_valid)
        return config
Exemplo n.º 21
0
def save_record_to_elasticsearch(doc, index_name):

    dry_run = RUN_CONFIG.get('job_statistics', {}).get('dry_run', False)
    es_host = RUN_CONFIG.get('elasticsearch', {}).get('host')

    if dry_run:
        app_logging.debug(
            f'Not actually sending the record to the statistics (dry run): {doc}'
        )
    else:
        app_logging.debug(
            f'Sending the following record to the statistics: {doc} '
            f'index name: {index_name} es_host: {es_host}')
        result = ES.index(index=index_name, body=doc, doc_type='_doc')
        app_logging.debug(f'Result {result}')
def shorten_url(long_url):
    """
    :param long_url:
    :return: an object with the hash and expiration date of the hash
    """
    hex_digest = hashlib.md5(long_url.encode('utf-8')).digest()

    # replace / and + to avoid routing problems
    url_hash = base64.b64encode(hex_digest).decode('utf-8').replace(
        '/', '_').replace('+', '-')
    app_logging.debug(f'url_hash: {url_hash}')

    # check if the url has been shortened before
    raw_document = get_url_shortening(url_hash)
    already_exists = raw_document is not None

    if already_exists:

        app_logging.debug(f'already exists')

        keep_alive = RUN_CONFIG.get('url_shortening').get('keep_alive', False)
        if not keep_alive:
            expiration_timestamp = raw_document['_source']['expires']
            expires = datetime.fromtimestamp(expiration_timestamp / 1000)
        else:
            expires = extend_expiration_date(raw_document)
            app_logging.debug(
                f'keep_alive is on, new expiration date is {expires}')

    else:

        app_logging.debug(f'Did not exist before')

        expires = save_shortened_url(long_url, url_hash)

    statistics_saver.record_url_was_shortened()
    trigger_deletion_of_expired_urls()
    return {'hash': url_hash, 'expires': expires}
    def get_config_for_group(self, index_name, group_name):
        """
        :param index_name: name of the index
        :param group_name: group name as defined in the groups file
        :return: the configuration of the group with the following structure:
        {
            "properties": {
                "default": [...], # properties to show by default
                "optional:" [...] # properties to show as optional for the user
            }
        }
        """

        cache_key = f'config_for_group_{index_name}-{group_name}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.groups_file_path, 'rt') as groups_file:

            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)

            index_groups = groups_config.get(index_name, {})
            group_config = index_groups.get(group_name)
            if group_config is None:
                raise self.GroupsConfigurationManagerError(
                    f'The group {group_name} does not exist in index {index_name}!'
                )

            props_configs = {}

            for sub_group, props_list in group_config.items():
                props_configs[sub_group] = self.get_config_for_props_list(
                    index_name, props_list)

            config = {'properties': props_configs}

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=config,
                             timeout=seconds_valid)

        return config
Exemplo n.º 24
0
def get_es_response(index_name, es_query, ignore_cache=False):
    """""
    :param index_name: name of the index to query against
    :param es_query: dict with the query to send
    :param ignore_cache: determines if cache must be ignored or not
    :return: the dict with the response from es
    """

    cache_key = get_es_query_cache_key(index_name, es_query)
    app_logging.debug(f'cache_key: {cache_key}')

    start_time = time.time()

    if not ignore_cache:

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            end_time = time.time()
            time_taken = end_time - start_time
            app_logging.debug(f'results were cached')
            record_that_response_was_cached(index_name, es_query, time_taken)
            return cache_response

    app_logging.debug(f'results were not cached')

    try:

        start_time = time.time()
        response = ES.search(index=index_name, body=es_query)
        end_time = time.time()
        time_taken = end_time - start_time

        record_that_response_not_cached(index_name, es_query, time_taken)

    except elasticsearch.exceptions.RequestError as error:
        app_logging.error(f'This query caused an error: ')
        app_logging.error(f'index_name:{index_name}')
        app_logging.error(f'es_query:')
        app_logging.error(es_query)
        raise error

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')

    if not ignore_cache:
        cache.fail_proof_set(key=cache_key, value=response, timeout=seconds_valid)

    return response
Exemplo n.º 25
0
def get_job_resources_params(job):
    """
    Gets the string with the parameters for bsub for the job requirements parameters
    :param job: job object for which to generate the parameters string
    :return: the parameters if that is the case, empty string if the default settings must be used.
    """
    job_config = delayed_job_models.get_job_config(job.type)
    source_requirements_script_path = job_config.requirements_script_path

    if source_requirements_script_path is None:
        return ''

    dest_requirements_script_path = Path(
        job.run_dir_path).joinpath('requirements_calculation.py')
    shutil.copyfile(source_requirements_script_path,
                    dest_requirements_script_path)

    file_stats = os.stat(source_requirements_script_path)
    os.chmod(dest_requirements_script_path, file_stats.st_mode | stat.S_IEXEC)

    run_params_path = get_job_run_params_file_path(job)
    run_command = f'{dest_requirements_script_path} {run_params_path}'

    requirements_params_process = subprocess.run(run_command.split(' '),
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.PIPE)

    return_code = requirements_params_process.returncode
    app_logging.debug(f'requirements return code was: {return_code}')
    if return_code != 0:
        raise JobSubmissionError(
            'There was an error when running the job submission script! Please check the logs'
        )

    app_logging.debug(
        f'Run params Output: \n {requirements_params_process.stdout}')
    app_logging.debug(
        f'Run params Error: \n {requirements_params_process.stderr}')

    params_ouput_str = requirements_params_process.stdout.decode().rstrip()
    if params_ouput_str == 'DEFAULT':
        return ''

    return params_ouput_str
Exemplo n.º 26
0
def get_es_doc(index_name, doc_id):
    """
    :param index_name: name of the intex to which the document belongs
    :param doc_id: id of the document
    :return: the dict with the response from es corresponding to the document
    """

    cache_key = f'document-{doc_id}'
    app_logging.debug(f'cache_key: {cache_key}')

    equivalent_query = {
        "query": {
            "ids": {
                "values": doc_id
            }
        }
    }

    start_time = time.time()
    cache_response = cache.fail_proof_get(key=cache_key)
    if cache_response is not None:
        end_time = time.time()
        time_taken = end_time - start_time
        app_logging.debug(f'results were cached')
        record_that_response_was_cached(index_name, equivalent_query, time_taken)
        return cache_response

    app_logging.debug(f'results were not cached')

    try:
        start_time = time.time()
        response = ES.get(index=index_name, id=doc_id)
        end_time = time.time()
        time_taken = end_time - start_time

        record_that_response_not_cached(index_name, equivalent_query, time_taken)
    except elasticsearch.exceptions.NotFoundError as error:
        raise ESDataNotFoundError(repr(error))

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
    cache.fail_proof_set(key=cache_key, value=response, timeout=seconds_valid)

    return response
    def get_list_of_configured_properties(self, index_name):
        """
        :param index_name: the index to check
        :return: a list of all the configured properties among all the groups
        """
        cache_key = f'configured_properties_for_{index_name}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.groups_file_path, 'rt') as groups_file:
            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)

            properties_identified = set()
            index_groups = groups_config.get(index_name, {})
            if index_groups is None:
                raise self.GroupsConfigurationManagerError(
                    f'The index {index_name} does not have a configuration set up!'
                )
            for subgroup in index_groups.values():
                for properties_list in subgroup.values():
                    for property_id in properties_list:
                        property_config = self.property_configuration_manager.get_config_for_prop(
                            index_name, property_id)
                        is_virtual = property_config.get('is_virtual', False)
                        # Do not include virtual properties
                        if is_virtual:
                            continue

                        properties_identified.add(property_id)

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=properties_identified,
                             timeout=seconds_valid)

        return list(properties_identified)
def save_record_to_elasticsearch(doc, index_name):
    """
    Saves the record indicated as parameter to the index indicated as parameter
    :param doc: doc to save
    :param index_name: index where to save the dod
    """

    dry_run = RUN_CONFIG.get('usage_statistics', {}).get('dry_run', False)
    es_host = RUN_CONFIG.get('usage_statistics', {}).get('elasticsearch', {}).get('host')
    es_port = RUN_CONFIG.get('usage_statistics', {}).get('elasticsearch', {}).get('port')

    if dry_run:
        app_logging.debug(f'Not actually sending the record to the statistics index {index_name} (dry run): {doc}')

    else:
        app_logging.debug(f'Sending the following record to the statistics: {doc} '
                          f'index name: {index_name} es_host: {es_host}:{es_port}')

        result = ES_MONITORING.index(index=index_name, body=doc, doc_type='_doc')
        app_logging.debug(f'Result {result}')
Exemplo n.º 29
0
def submit_job(job_type, input_files_desc, input_files_hashes,
               docker_image_url, job_params):
    """
    Submits job to the queue, and runs it in background
    :param job_type: type of job to submit
    :param job_params: dict with the job parameters
    """

    try:

        # See if the job already exists
        job = delayed_job_models.get_job_by_params(job_type, job_params,
                                                   docker_image_url,
                                                   input_files_hashes)

        # If it exists, continues here. If not see submits it (see except)
        statistics_saver.save_job_cache_record(
            job_type=str(job_type),
            run_env_type=RUN_CONFIG.get('run_env'),
            was_cached=True,
            request_date=datetime.utcnow().timestamp() * 1000)

        app_logging.debug(f'Job {job.id} already exists, status: {job.status}')

        if job.status in [
                delayed_job_models.JobStatuses.CREATED,
                delayed_job_models.JobStatuses.QUEUED,
                delayed_job_models.JobStatuses.RUNNING,
                delayed_job_models.JobStatuses.UNKNOWN
        ]:

            return get_job_submission_response(job)

        elif job.status == delayed_job_models.JobStatuses.ERROR:

            if job.num_failures <= MAX_RETRIES:
                app_logging.debug(
                    f'{job.id} has failed {job.num_failures}. Max retries is {MAX_RETRIES}. '
                    f'I will submit it again')
                job = create_and_submit_job(job_type, input_files_desc,
                                            input_files_hashes,
                                            docker_image_url, job_params)
                return get_job_submission_response(job)
            else:
                app_logging.debug(
                    f'{job.id} has failed {job.num_failures} times. Max retries is {MAX_RETRIES}. '
                    f'NOT submitting it again')
                return get_job_submission_response(job)

        elif job.status == delayed_job_models.JobStatuses.FINISHED:

            must_ignore_cache = parse_ignore_cache_param(job_params)
            output_was_lost = job_output_was_lost(job)

            app_logging.debug(
                f'{job.id}: must_ignore_cache: {must_ignore_cache}')
            app_logging.debug(f'{job.id}: output_was_lost: {output_was_lost}')

            must_resubmit = must_ignore_cache or output_was_lost
            app_logging.debug(f'{job.id}: must_resubmit: {must_resubmit}')

            if must_resubmit:
                app_logging.debug(f'I will delete and submit again {job.id}')
                delayed_job_models.delete_job(job)
                job = create_and_submit_job(job_type, input_files_desc,
                                            input_files_hashes,
                                            docker_image_url, job_params)
                return get_job_submission_response(job)

        return get_job_submission_response(job)

    except delayed_job_models.JobNotFoundError:

        job = create_and_submit_job(job_type, input_files_desc,
                                    input_files_hashes, docker_image_url,
                                    job_params)
        statistics_saver.save_job_cache_record(
            job_type=str(job_type),
            run_env_type=RUN_CONFIG.get('run_env'),
            was_cached=False,
            request_date=datetime.utcnow().timestamp() * 1000)
        return get_job_submission_response(job)
Exemplo n.º 30
0
def submit_job_to_lsf(job):
    """
    Runs a script that submits the job to LSF
    :param job: DelayedJob object
    """
    submit_file_path = get_job_submission_script_file_path(job)
    submission_output_path = Path(submit_file_path).parent.joinpath(
        'submission.out')
    submission_error_path = Path(submit_file_path).parent.joinpath(
        'submission.err')

    lsf_config = RUN_CONFIG.get('lsf_submission')
    id_rsa_path = lsf_config['id_rsa_file']

    run_command = f'{submit_file_path} {id_rsa_path}'
    app_logging.debug(
        f'Going to run job submission script, command: {run_command}')

    must_run_jobs = RUN_CONFIG.get('run_jobs', True)
    if not must_run_jobs:
        app_logging.debug(f'Not submitting jobs because run_jobs is False')
        return

    submission_process = subprocess.run(run_command.split(' '),
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)

    app_logging.debug(f'Submission STD Output: \n {submission_process.stdout}')
    app_logging.debug(f'Submission STD Error: \n {submission_process.stderr}')

    with open(submission_output_path, 'wb') as submission_out_file:
        submission_out_file.write(submission_process.stdout)

    with open(submission_error_path, 'wb') as submission_err_file:
        submission_err_file.write(submission_process.stderr)

    return_code = submission_process.returncode
    app_logging.debug(f'submission return code was: {return_code}')
    if return_code != 0:
        raise JobSubmissionError(
            'There was an error when running the job submission script! Please check the logs'
        )

    lsf_job_id = get_lsf_job_id(str(submission_process.stdout))
    job.lsf_job_id = lsf_job_id
    job.status = delayed_job_models.JobStatuses.QUEUED
    delayed_job_models.save_job(job)
    app_logging.debug(f'LSF Job ID is: {lsf_job_id}')