def make_filter_label_dict(self, act_val): """ returns a dictionary object with a label and set of entities (in cases of OR searchs) """ related_suffix = '' output = {'label': False, 'data-type': 'id', 'slug': False, 'entities': []} labels = [] if '||' in act_val: vals = act_val.split('||') else: vals = [act_val] for val in vals: qm = QueryMaker() db_val = qm.clean_related_slug(val) if val != db_val: related_suffix = ' (for related items)' f_entity = self.m_cache.get_entity(db_val) if f_entity: # get the solr field data type ent_solr_data_type = qm.get_solr_field_type(f_entity.data_type) if ent_solr_data_type is not False \ and ent_solr_data_type != 'id': output['data-type'] = ent_solr_data_type labels.append(f_entity.label) output['entities'].append(f_entity) else: labels.append(val) output['label'] = (' OR '.join(labels)) + related_suffix output['slug'] = '-or-'.join(vals) return output
def get_request_param(self, request_dict, param, default, as_list=False, solr_escape=False): """ get a string or list to use in queries from either the request object or the internal_request object so we have flexibility in doing searches without having to go through HTTP """ if request_dict is not False: if as_list: if param in request_dict: param_obj = request_dict[param] if isinstance(param_obj, list): output = param_obj else: if solr_escape: param_obj = '"' + param_obj + '"' output = [param_obj] else: output = default else: if param in request_dict: output = request_dict[param] if isinstance(output, list): output = output[0] if solr_escape: qm = QueryMaker() if output[0] == '"' and output[-1] == '"': output = qm.escape_solr_arg(output[1:-1]) output = '"' + output + '"' else: output = qm.escape_solr_arg(output) else: output = default else: output = False return output
def make_bbox_filter_label(self, raw_disc_bbox): """ parses a raw bbox parameter value to make a filter label """ qm = QueryMaker() output_list = [] if '||' in raw_disc_bbox: bbox_list = raw_disc_bbox.split('||') else: bbox_list = [raw_disc_bbox] for bbox in bbox_list: if ',' in bbox: bbox_coors = bbox.split(',') bbox_valid = qm.validate_bbox_coordiantes(bbox_coors) if bbox_valid: label = 'In the bounding-box of: Latitude ' label += str(bbox_coors[1]) label += ', Longitude ' + str(bbox_coors[0]) label += ' (SW) and Latitude ' + str(bbox_coors[3]) label += ', Longitude ' + str(bbox_coors[2]) label += ' (NE)' output_list.append(label) else: output_list.append('[Ignored invalid bounding-box]') else: output_list.append('[Ignored invalid bounding-box]') output = '; or '.join(output_list) return output
def make_filter_label_dict(self, act_val): """ returns a dictionary object with a label and set of entities (in cases of OR searchs) """ output = { 'label': False, 'data-type': 'id', 'slug': False, 'entities': [] } labels = [] if '||' in act_val: vals = act_val.split('||') else: vals = [act_val] for val in vals: f_entity = self.get_entity(val) if f_entity is not False: qm = QueryMaker() # get the solr field data type ent_solr_data_type = qm.get_solr_field_type(f_entity.data_type) if ent_solr_data_type is not False \ and ent_solr_data_type != 'id': output['data-type'] = ent_solr_data_type labels.append(f_entity.label) output['entities'].append(f_entity) else: labels.append(val) output['label'] = ' OR '.join(labels) output['slug'] = '-or-'.join(vals) return output
def get_attributes(self, solr_rec): """ gets attributes for a record, based on the predicates requested in the search and optional predicates passed by a client with a GET request with parameter 'attributes' """ qm = QueryMaker() solr_field_entities = {} for attribute in self.rec_attributes: entity = self.get_entity(attribute) if entity is not False: prop_slug = entity.slug # check to make sure we have the entity data type for linked fields if entity.data_type is False and entity.item_type == 'uri': lequiv = LinkEquivalence() dtypes = lequiv.get_data_types_from_object(entity.uri) if isinstance(dtypes, list): # set te data type and the act-field # print('Found for ' + prop_slug + ' ' + dtypes[0]) entity.data_type = dtypes[0] if prop_slug in self.entities: self.entities[ prop_slug] = entity # store entitty for later use field_parts = qm.make_prop_solr_field_parts(entity) solr_field = field_parts['prefix'] + '___pred_' + field_parts[ 'suffix'] # extract children of the solr_field so we know if # we have the most specific attributes, then we can get # values for the most specific attributes self.extract_attribute_children(solr_rec, solr_field) self.clean_attribute_hiearchies() if isinstance(self.attribute_hierarchies, dict): self.other_attributes = [] for field_slug_key, values in self.attribute_hierarchies.items(): entity = self.get_entity(field_slug_key) if entity is not False: attribute_dict = LastUpdatedOrderedDict() attribute_dict['property'] = entity.label attribute_dict['values_list'] = [] attribute_dict['value'] = '' string_val = False delim = '' for val in values: if isinstance(val, str): string_val = True parsed_val = self.parse_solr_value_parts(val) attribute_dict["values_list"].append( parsed_val['label']) attribute_dict['value'] += delim + str( parsed_val['label']) else: attribute_dict["values_list"].append(val) attribute_dict['value'] += delim + str(val) delim = self.ATTRIBUTE_DELIM if len(values) == 1 \ and string_val is False: attribute_dict['value'] = values[0] self.other_attributes.append(attribute_dict)
def get_attributes(self, solr_rec): """ gets attributes for a record, based on the predicates requested in the search and optional predicates passed by a client with a GET request with parameter 'attributes' """ qm = QueryMaker() solr_field_entities = {} for attribute in self.rec_attributes: entity = self.get_entity(attribute) if entity is not False: prop_slug = entity.slug # check to make sure we have the entity data type for linked fields if entity.data_type is False and entity.item_type == 'uri': lequiv = LinkEquivalence() dtypes = lequiv.get_data_types_from_object(entity.uri) if isinstance(dtypes, list): # set te data type and the act-field # print('Found for ' + prop_slug + ' ' + dtypes[0]) entity.data_type = dtypes[0] if prop_slug in self.entities: self.entities[prop_slug] = entity # store entitty for later use field_parts = qm.make_prop_solr_field_parts(entity) solr_field = field_parts['prefix'] + '___pred_' + field_parts['suffix'] # extract children of the solr_field so we know if # we have the most specific attributes, then we can get # values for the most specific attributes self.extract_attribute_children(solr_rec, solr_field) self.clean_attribute_hiearchies() if isinstance(self.attribute_hierarchies, dict): self.other_attributes = [] for field_slug_key, values in self.attribute_hierarchies.items(): entity = self.get_entity(field_slug_key) if entity is not False: attribute_dict = LastUpdatedOrderedDict() attribute_dict['property'] = entity.label attribute_dict['values_list'] = [] attribute_dict['value'] = '' string_val = False delim = '' for val in values: if isinstance(val, str): string_val = True parsed_val = self.parse_solr_value_parts(val) attribute_dict["values_list"].append(parsed_val['label']) attribute_dict['value'] += delim + str(parsed_val['label']) else: attribute_dict["values_list"].append(val) attribute_dict['value'] += delim + str(val) delim = self.ATTRIBUTE_DELIM if len(values) == 1 \ and string_val is False: attribute_dict['value'] = values[0] self.other_attributes.append(attribute_dict)
def add_date_fields(self, solr_json): """ adds numeric fields with query options """ date_fields = [] date_facet_ranges = self.get_solr_ranges(solr_json, 'date') if date_facet_ranges is not False: for solr_field_key, ranges in date_facet_ranges.items(): facet_key_list = solr_field_key.split('___') slug = facet_key_list[0].replace('_', '-') # check to see if the field is a linkded data field # if so, it needs some help with making Filter Links linked_field = False field_entity = self.get_entity(slug) if field_entity is not False: self.add_active_facet_field(slug) if field_entity.item_type == 'uri': linked_field = True field = self.get_facet_meta(solr_field_key) field['oc-api:min-date'] = ranges['start'] field['oc-api:max-date'] = ranges['end'] field['oc-api:gap-date'] = ranges['gap'] field['oc-api:has-range-options'] = [] i = -1 qm = QueryMaker() for range_min_key in ranges['counts'][::2]: i += 2 solr_count = ranges['counts'][i] fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context fl.partial_param_val_match = True dt_end = qm.add_solr_gap_to_date(range_min_key, ranges['gap']) range_end = qm.convert_date_to_solr_date(dt_end) solr_range = '[' + range_min_key + ' TO ' + range_end + ' ]' new_rparams = fl.add_to_request('prop', solr_range, slug) range_dict = LastUpdatedOrderedDict() range_dict['id'] = fl.make_request_url(new_rparams) range_dict['json'] = fl.make_request_url(new_rparams, '.json') range_dict['label'] = qm.make_human_readable_date(range_min_key) + ' to ' + qm.make_human_readable_date(range_end) range_dict['count'] = solr_count range_dict['oc-api:min-date'] = range_min_key range_dict['oc-api:max-date'] = range_end field['oc-api:has-range-options'].append(range_dict) date_fields.append(field) if len(date_fields) > 0 and 'facet' in self.act_responses: self.json_ld['oc-api:has-date-facets'] = date_fields
def add_stats_ranges_from_solr(self, query): """ gets solr stats by searching solr searches solr to get raw solr search results """ stats_query = self.compose_query() # make the stats query response = self.solr.search(**stats_query) # execute solr query solr_json = response.raw_content if isinstance(solr_json, dict): if 'stats' in solr_json: if 'stats_fields' in solr_json['stats']: qm = QueryMaker() groups = qm.histogram_groups for solr_field_key, stats in solr_json['stats']['stats_fields'].items(): if stats is not None: if solr_field_key not in query['facet.range']: query['facet.range'].append(solr_field_key) if solr_field_key not in query['stats.field']: query['stats.field'].append(solr_field_key) fstart = 'f.' + solr_field_key + '.facet.range.start' fend = 'f.' + solr_field_key + '.facet.range.end' fgap = 'f.' + solr_field_key + '.facet.range.gap' findex = 'f.' + solr_field_key + '.facet.sort' fother = 'f.' + solr_field_key + '.facet.range.other' finclude = 'f.' + solr_field_key + '.facet.range.include' query[fother] = 'all' query[finclude] = 'all' if 'count' in stats: if (stats['count'] / qm.histogram_groups) < 3: groups = 4 if '___pred_date' in solr_field_key: query[fstart] = qm.convert_date_to_solr_date(stats['min']) query[fend] = qm.convert_date_to_solr_date(stats['max']) query[fgap] = qm.get_date_difference_for_solr(stats['min'], stats['max'], groups) query[findex] = 'index' # sort by index, not by count else: query[fstart] = stats['min'] query[fend] = stats['max'] query[fgap] = ((stats['max'] - stats['min']) / groups) if query[fgap] > stats['mean']: query[fgap] = stats['mean'] / 3; # query[fgap] = ((stats['max'] - stats['min']) / groups) - ((stats['max'] - stats['min']) / groups) * .01 query[findex] = 'index' # sort by index, not by count return query
def get_request_param(self, request_dict, param, default, as_list=False, solr_escape=False): """ get a string or list to use in queries from either the request object or the internal_request object so we have flexibility in doing searches without having to go through HTTP """ if request_dict is not False: if as_list: if param in request_dict: param_obj = request_dict[param] if isinstance(param_obj, list): output = param_obj else: if solr_escape: param_obj = '"' + param_obj + '"' output = [param_obj] else: output = default else: if param in request_dict: output = request_dict[param] if isinstance(output, list): output = output[0] if solr_escape: qm = QueryMaker() if output[0] == '"' and output[-1] == '"': output = qm.escape_solr_arg(output[1:-1]) output = '"' + output + '"' else: output = qm.escape_solr_arg(output) else: output = default else: output = False return output
def add_date_fields(self, solr_json): """ adds numeric fields with query options """ date_fields = [] date_facet_ranges = self.get_solr_ranges(solr_json, 'date') if date_facet_ranges is not False: for solr_field_key, ranges in date_facet_ranges.items(): facet_key_list = solr_field_key.split('___') slug = facet_key_list[0].replace('_', '-') # check to see if the field is a linkded data field # if so, it needs some help with making Filter Links linked_field = False field_entity = self.get_entity(slug) if field_entity is not False: self.add_active_facet_field(slug) if field_entity.item_type == 'uri': linked_field = True field = self.get_facet_meta(solr_field_key) field['oc-api:min-date'] = ranges['start'] field['oc-api:max-date'] = ranges['end'] field['oc-api:gap-date'] = ranges['gap'] field['oc-api:has-range-options'] = [] i = -1 qm = QueryMaker() for range_min_key in ranges['counts'][::2]: i += 2 solr_count = ranges['counts'][i] fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context fl.partial_param_val_match = True dt_end = qm.add_solr_gap_to_date(range_min_key, ranges['gap']) range_end = qm.convert_date_to_solr_date(dt_end) solr_range = '[' + range_min_key + ' TO ' + range_end + ' ]' new_rparams = fl.add_to_request('prop', solr_range, slug) range_dict = LastUpdatedOrderedDict() range_dict['id'] = fl.make_request_url(new_rparams) range_dict['json'] = fl.make_request_url( new_rparams, '.json') range_dict['label'] = qm.make_human_readable_date( range_min_key) + ' to ' + qm.make_human_readable_date( range_end) range_dict['count'] = solr_count range_dict['oc-api:min-date'] = range_min_key range_dict['oc-api:max-date'] = range_end field['oc-api:has-range-options'].append(range_dict) date_fields.append(field) if len(date_fields) > 0 and 'facet' in self.act_responses: self.json_ld['oc-api:has-date-facets'] = date_fields
def parse_json_record(self, json_rec): """ parses json for a geo-json feature of the record """ if 'properties' in json_rec: props = json_rec['properties'] else: props = json_rec if isinstance(props, dict): if 'id' in props: self.id = props['id'].replace('#', '') if 'label' in props: self.label = props['label'] if 'href' in props: self.href = props['href'] if 'uri' in props: item_type_output = URImanagement.get_uuid_from_oc_uri( props['uri'], True) if isinstance(item_type_output, dict): self.item_type = item_type_output['item_type'] self.uuid = item_type_output['uuid'] if 'project label' in props: self.project = props['project label'] if 'context label' in props: self.context = props['context label'] if 'early bce/ce' in props: self.early_bce_ce = props['early bce/ce'] if self.early_bce_ce < 0: self.early_bce_ce = int(round(self.early_bce_ce * -1, 0)) self.early_suffix = 'BCE' else: self.early_bce_ce = int(round(self.early_bce_ce, 0)) self.early_suffix = False if 'late bce/ce' in props: self.late_bce_ce = props['late bce/ce'] if self.late_bce_ce < 0: self.late_bce_ce = int(round(self.late_bce_ce * -1, 0)) self.late_suffix = 'BCE' else: self.late_bce_ce = int(round(self.late_bce_ce, 0)) self.late_suffix = False if 'item category' in props: self.category = props['item category'] if 'snippet' in props: self.snippet = props['snippet'] self.snippet = self.snippet.replace('<em>', '[[[[mark]]]]') self.snippet = self.snippet.replace('</em>', '[[[[/mark]]]]') self.snippet = strip_tags(self.snippet) self.snippet = self.snippet.replace('</', '') self.snippet = self.snippet.replace('<', '') self.snippet = self.snippet.replace('>', '') self.snippet = self.snippet.replace('[[[[mark]]]]', '<mark>') self.snippet = self.snippet.replace('[[[[/mark]]]]', '</mark>') if 'thumbnail' in props: self.thumbnail = props['thumbnail'] if 'published' in props: self.published = QueryMaker().make_human_readable_date( props['published']) if 'updated' in props: self.updated = QueryMaker().make_human_readable_date( props['updated'])
def compose_query(self, request_dict): """ composes the search query based on the request_dict """ qm = QueryMaker() child_context_join = False # do a JOIN to include children in results query = {} query['facet'] = 'true' query['facet.mincount'] = 1 query['rows'] = self.rows query['start'] = self.start query['debugQuery'] = 'false' query['fq'] = [] query['facet.field'] = [] query['facet.range'] = [] query['stats'] = 'true' query['stats.field'] = self.stats_fields query['sort'] = SortingOptions.DEFAULT_SOLR_SORT s_param = self.get_request_param(request_dict, 'sort', False, False) if s_param is not False: # add custom sorting sort_opts = SortingOptions() query['sort'] = sort_opts.make_solr_sort_param(s_param) # If the user does not provide a search term, search for everything query['q'] = '*:*' # defaul search for all q_param = self.get_request_param(request_dict, 'q', False, False) if q_param is not False: escaped_terms = qm.prep_string_search_term(q_param) query['q'] = 'text:' + ' '.join(escaped_terms) query['q.op'] = 'AND' query['hl'] = 'true' query['hl.fl'] = 'text' query['hl.q'] = 'text:' + ' '.join(escaped_terms) start = self.get_request_param(request_dict, 'start', False, False) if start is not False: query['start'] = re.sub(r'[^\d]', r'', str(start)) rows = self.get_request_param(request_dict, 'rows', False, False) if rows is not False: rows = re.sub(r'[^\d]', r'', str(rows)) rows = int(float(rows)) if rows > self.max_rows: rows = self.max_rows elif rows < 0: rows = 0 query['rows'] = rows # Spatial Context if 'path' in request_dict and self.do_context_paths: self.remove_from_default_facet_fields(SolrDocument.ROOT_CONTEXT_SOLR) print('context starts as: ' + str(request_dict['path'])) context = qm._process_spatial_context(request_dict['path']) query['fq'].append(context['fq']) query['facet.field'] += context['facet.field'] # context facet fields, always a list # Properties and Linked Data props = self.get_request_param(request_dict, 'prop', False, True) if props is not False: for act_prop in props: # process each prop independently. prop_query = qm.process_prop(act_prop) query['fq'] += prop_query['fq'] query['facet.field'] += prop_query['facet.field'] query['stats.field'] += prop_query['stats.field'] query['facet.range'] += prop_query['facet.range'] if 'ranges' in prop_query: for key, value in prop_query['ranges'].items(): query[key] = value if 'hl-queries' in prop_query: query['hl'] = 'true' query['hl.fl'] = 'text' # query['hl.snippets'] = 2 for q_term in prop_query['hl-queries']: if 'hl.q' in query: query['hl.q'] += ' OR (' + q_term + ')' else: query['hl.q'] = q_term if 'prequery-stats' in prop_query: # we have fields that need a stats prequery self.prequery_stats += prop_query['prequery-stats'] # Project proj = self.get_request_param(request_dict, 'proj', False) if proj is not False: # remove the facet field, since we're already filtering with it self.remove_from_default_facet_fields(SolrDocument.ROOT_PROJECT_SOLR) proj_query = qm.process_proj(proj) query['fq'] += proj_query['fq'] query['facet.field'] += proj_query['facet.field'] # Dublin-Core terms dc_query_term_exists = False dc_terms_obj = DCterms() dc_params = dc_terms_obj.get_dc_params_list() for dc_param in dc_params: dc_terms = self.get_request_param(request_dict, dc_param, False, True) if dc_terms is not False: dc_query_term_exists = True dc_query = qm.process_dc_term(dc_param, dc_terms) query['fq'] += dc_query['fq'] query['facet.field'] += dc_query['facet.field'] if dc_param == 'dc-temporal': child_context_join = False # turn this off # item-types item_type = self.get_request_param(request_dict, 'type', False, False) if item_type is not False: # remove the facet field, since we're already filtering with it self.remove_from_default_facet_fields('item_type') # indicate that the item_type_limit is in effect self.item_type_limited = True it_query = qm.process_item_type(item_type) query['fq'] += it_query['fq'] query['facet.field'] += it_query['facet.field'] """ If a item_type_limit is set, then we're doing a specialized search that looks only for a certain item_type. """ if self.item_type_limit is not False: # indicate that the item_type_limit is in effect self.item_type_limited = True query['fq'].append('item_type:' + self.item_type_limit) if self.item_type_limit in self.ITEM_TYPE_ROWS: query['rows'] = self.ITEM_TYPE_ROWS[self.item_type_limit] if self.item_type_limit in self.ITEM_TYPE_FACET_MIN: query['facet.mincount'] = self.ITEM_TYPE_FACET_MIN[self.item_type_limit] if dc_query_term_exists is True and query['facet.mincount'] > 1: # we're already limiting by a DC terms search, so allow all # search facets query['facet.mincount'] = 1 if self.item_type_limit in self.ITEM_TYPE_FACETFIELDS: for add_facet_field in self.ITEM_TYPE_FACETFIELDS[self.item_type_limit]: if add_facet_field not in query['facet.field']: # add facet field for this type of item query['facet.field'].append(add_facet_field) else: cat_field_found = False for item_cat_field in self.ITEM_CAT_FIELDS: for facet_field in query['facet.field']: if item_cat_field in facet_field: cat_field_found = True if cat_field_found is False: query['facet.field'].append('item_type') """ CHRONOLOGY Form Use Life (form) queries """ # now add form-use-life chronology form_chrono = self.get_request_param(request_dict, 'form-chronotile', False, False) if form_chrono is not False: # query for form-use-live chronological tile form_chrono_query = qm.process_form_use_life_chrono(form_chrono) query['fq'] += form_chrono_query['fq'] query['facet.field'] += form_chrono_query['facet.field'] query['f.form_use_life_chrono_tile.facet.limit'] = -1 else: # Add default form-use-life chronology query = self.add_root_form_use_life_chrono(query, request_dict) form_start = self.get_request_param(request_dict, 'form-start', False, False) if form_start is not False: # query for form-use-live start date form_start_query = qm.process_form_date_chrono(form_start, 'start') query['fq'] += form_start_query['fq'] form_stop = self.get_request_param(request_dict, 'form-stop', False, False) if form_stop is not False: # query for form-use-live stop date form_stop_query = qm.process_form_date_chrono(form_stop, 'stop') query['fq'] += form_stop_query['fq'] """ Updated and Published Times """ updated = self.get_request_param(request_dict, 'updated', False, False) if updated is not False: # query for when the resource was updated query['fq'].append('updated:' + updated) published = self.get_request_param(request_dict, 'published', False, False) if published is not False: # query for when the resource was published query['fq'].append('published:' + published) """ query by uuid uri, or other identifier """ uuid = self.get_request_param(request_dict, 'uuid', False, False) if uuid is not False: query['fq'].append('uuid:' + uuid) identifier = self.get_request_param(request_dict, 'id', False, False) if identifier is not False: id_query = qm.process_id(identifier) query['fq'] += id_query['fq'] """ Linked media (images, documents, other) queries """ # images images = self.get_request_param(request_dict, 'images', False, False) if images is not False: query['fq'] += ['image_media_count:[1 TO *]'] # other media (not images) other_media = self.get_request_param(request_dict, 'other-media', False, False) if other_media is not False: query['fq'] += ['other_binary_media_count:[1 TO *]'] # other media (not images) documents = self.get_request_param(request_dict, 'documents', False, False) if documents is not False: query['fq'] += ['document_count:[1 TO *]'] """ Geospatial (discovery location) queries """ # now add discovery geo location disc_geo = self.get_request_param(request_dict, 'disc-geotile', False, False) if disc_geo is not False: disc_geo_query = qm.process_discovery_geo(disc_geo) query['fq'] += disc_geo_query['fq'] query['facet.field'] += disc_geo_query['facet.field'] query['f.discovery_geotile.facet.limit'] = -1 else: # Add default geofacet query = self.add_root_discovery_geo(query, request_dict) # geospatial bounding box query disc_bbox = self.get_request_param(request_dict, 'disc-bbox', False, False) if disc_bbox is not False: disc_bbox_query = qm.process_discovery_bbox(disc_bbox) query['fq'] += disc_bbox_query['fq'] # get items with a URI (or slug) indentified object obj = self.get_request_param(request_dict, 'obj', False) if obj is not False: obj_query = qm.process_ld_object(obj) query['fq'] += obj_query['fq'] """ ----------------------------------------- Add default facet fields, used for most searches ----------------------------------------- """ query = self.add_default_facet_fields(query, request_dict) """ ----------------------------------------- Additional, dataset specific specialized queries ----------------------------------------- """ # special queries (to simplify access to specific datasets) spsearch = SpecialSearches() response = self.get_request_param(request_dict, 'response', False, False) if response is not False: if 'geo-project' in response: # request for special handling of project facets with # added geospatial and chronological metadata query = spsearch.process_geo_projects(query) linked = self.get_request_param(request_dict, 'linked', False, False) if linked == 'dinaa-cross-ref': query = spsearch.process_linked_dinaa(query) trinomial = self.get_request_param(request_dict, 'trinomial', False, False) if trinomial is not False: query = spsearch.process_trinonial_reconcile(trinomial, query) reconcile = self.get_request_param(request_dict, 'reconcile', False, True) if reconcile is not False: query = spsearch.process_reconcile(reconcile, query) if len(self.prequery_stats) > 0: # we have fields that need a stats prequery statsq = StatsQuery() statsq.q = query['q'] if 'q.op' in query: statsq.q_op = query['q.op'] statsq.fq = query['fq'] statsq.stats_fields = self.prequery_stats query = statsq.add_stats_ranges_from_solr(query) if child_context_join: all_fq = False for fq in query['fq']: if all_fq is False: all_fq = '(' + fq + ')' else: all_fq += ' AND (' + fq + ')' all_fq = '(' + all_fq + ')' joined_fq = '{!join from=slug_type_uri_label to=obj_all___context_id}' + all_fq query['fq'] = all_fq + ' OR _query_:"' + joined_fq + '"' # now clean the stats fields to make sure we're not repeading ourselves if len(query['stats.field']) > 0: unique_stats_fields = [] for stats_field in query['stats.field']: if stats_field not in unique_stats_fields: unique_stats_fields.append(stats_field) query['stats.field'] = unique_stats_fields return query
def make_sorted_facet_list(self, pre_sort_facets): """ makes a list of sorted facets based on a dictionary oject of pre_sort_facets """ json_ld_facets = [] used_keys = [] if 'prop' in self.request_dict: # first check for 'prop' related facets # these get promoted to the first positions in the list raw_plist = self.request_dict['prop'] plist = raw_plist[::-1] # reverse the list, so last props first qm = QueryMaker() for param_val in plist: param_paths = qm.expand_hierarchy_options(param_val) for id_key, facet in pre_sort_facets.items(): for param_slugs in param_paths: last_slug = param_slugs[-1] if last_slug in id_key \ and id_key not in used_keys: # the facet id has the last slug id! # so add to the ordered list of facets json_ld_facets.append(facet) used_keys.append(id_key) # now add facet for context for id_key, facet in pre_sort_facets.items(): if '#facet-context' in id_key \ and id_key not in used_keys: json_ld_facets.append(facet) used_keys.append(id_key) # now add facet for item-types if '#facet-item-type' in pre_sort_facets \ and '#facet-item-type' not in used_keys: json_ld_facets.append(pre_sort_facets['#facet-item-type']) used_keys.append('#facet-item-type') # now add item categories for id_key, facet in pre_sort_facets.items(): if '#facet-prop-oc-gen-' in id_key \ and id_key not in used_keys: json_ld_facets.append(facet) used_keys.append(id_key) # now add facet for projects for id_key, facet in pre_sort_facets.items(): if '#facet-project' in id_key \ and id_key not in used_keys: json_ld_facets.append(facet) used_keys.append(id_key) # now add facet for root linked data if '#facet-prop-ld' in pre_sort_facets \ and '#facet-prop-ld' not in used_keys: json_ld_facets.append(pre_sort_facets['#facet-prop-ld']) used_keys.append('#facet-prop-ld') # now add facet for root properties if '#facet-prop-var' in pre_sort_facets \ and '#facet-prop-var' not in used_keys: json_ld_facets.append(pre_sort_facets['#facet-prop-var']) used_keys.append('#facet-prop-var') for id_key in used_keys: # delete all the used facets by key pre_sort_facets.pop(id_key, None) for id_key, facet in pre_sort_facets.items(): # add remaining (unsorted) facets json_ld_facets.append(facet) if self.rel_media_facet is not False: # add the related media facet json_ld_facets.append(self.rel_media_facet) return json_ld_facets
def make_sorted_facet_list(self, pre_sort_facets): """ makes a list of sorted facets based on a dictionary oject of pre_sort_facets """ json_ld_facets = [] used_keys = [] if 'prop' in self.request_dict: # first check for 'prop' related facets # these get promoted to the first positions in the list raw_plist = self.request_dict['prop'] plist = raw_plist[::-1] # reverse the list, so last props first qm = QueryMaker() for param_val in plist: param_paths = qm.expand_hierarchy_options(param_val) for id_key, facet in pre_sort_facets.items(): for param_slugs in param_paths: last_slug = param_slugs[-1] if last_slug in id_key \ and id_key not in used_keys: # the facet id has the last slug id! # so add to the ordered list of facets json_ld_facets.append(facet) used_keys.append(id_key) # now add facet for context for id_key, facet in pre_sort_facets.items(): if '#facet-context' in id_key \ and id_key not in used_keys: json_ld_facets.append(facet) used_keys.append(id_key) # now add facet for item-types if '#facet-item-type' in pre_sort_facets \ and '#facet-item-type' not in used_keys: json_ld_facets.append(pre_sort_facets['#facet-item-type']) used_keys.append('#facet-item-type') # now add item categories for id_key, facet in pre_sort_facets.items(): if '#facet-prop-oc-gen-' in id_key \ and id_key not in used_keys: json_ld_facets.append(facet) used_keys.append(id_key) # now add facet for projects for id_key, facet in pre_sort_facets.items(): if '#facet-project' in id_key \ and id_key not in used_keys: json_ld_facets.append(facet) used_keys.append(id_key) # now add facet for root linked data if '#facet-prop-ld' in pre_sort_facets \ and '#facet-prop-ld' not in used_keys: json_ld_facets.append(pre_sort_facets['#facet-prop-ld']) used_keys.append('#facet-prop-ld') # now add facet for root properties if '#facet-prop-var' in pre_sort_facets \ and '#facet-prop-var' not in used_keys: json_ld_facets.append(pre_sort_facets['#facet-prop-var']) used_keys.append('#facet-prop-var') for id_key in used_keys: # delete all the used facets by key pre_sort_facets.pop(id_key, None) for id_key, facet in pre_sort_facets.items(): # add remaining (unsorted) facets json_ld_facets.append(facet) if self.rel_media_facet is not False: # add the related media facet json_ld_facets.append(self.rel_media_facet) return json_ld_facets
def compose_query(self, request_dict): """ composes the search query based on the request_dict """ qm = QueryMaker() child_context_join = False # do a JOIN to include children in results query = {} query["facet"] = "true" query["facet.mincount"] = 1 query["rows"] = self.rows query["start"] = self.start query["debugQuery"] = "false" query["fq"] = [] query["facet.field"] = [] query["facet.range"] = [] query["stats"] = "true" query["stats.field"] = ["updated", "published"] query["sort"] = "interest_score desc" s_param = self.get_request_param(request_dict, "sort", False, False) if s_param is not False: # add custom sorting query["sort"] = s_param # If the user does not provide a search term, search for everything query["q"] = "*:*" # defaul search for all q_param = self.get_request_param(request_dict, "q", False, False) if q_param is not False: escaped_terms = qm.prep_string_search_term(q_param) query["q"] = " ".join(escaped_terms) query["q.op"] = "AND" query["hl"] = "true" query["hl.fl"] = "text" query["hl.q"] = " ".join(escaped_terms) start = self.get_request_param(request_dict, "start", False, False) if start is not False: query["start"] = re.sub(r"[^\d]", r"", start) rows = self.get_request_param(request_dict, "rows", False, False) if rows is not False: rows = re.sub(r"[^\d]", r"", rows) rows = int(float(rows)) if rows > self.max_rows: rows = self.max_rows elif rows < 0: rows = 0 query["rows"] = rows # Spatial Context if "path" in request_dict and self.do_context_paths: self.remove_from_default_facet_fields(SolrDocument.ROOT_CONTEXT_SOLR) context = qm._process_spatial_context(request_dict["path"]) query["fq"].append(context["fq"]) query["facet.field"] += context["facet.field"] # context facet fields, always a list # Properties and Linked Data props = self.get_request_param(request_dict, "prop", False, True) if props is not False: for act_prop in props: # process each prop independently. prop_query = qm.process_prop(act_prop) query["fq"] += prop_query["fq"] query["facet.field"] += prop_query["facet.field"] query["stats.field"] += prop_query["stats.field"] query["facet.range"] += prop_query["facet.range"] if "ranges" in prop_query: for key, value in prop_query["ranges"].items(): query[key] = value if "hl-queries" in prop_query: query["hl"] = "true" query["hl.fl"] = "text" # query['hl.snippets'] = 2 for q_term in prop_query["hl-queries"]: if "hl.q" in query: query["hl.q"] += " OR (" + q_term + ")" else: query["hl.q"] = q_term if "prequery-stats" in prop_query: # we have fields that need a stats prequery self.prequery_stats += prop_query["prequery-stats"] # Project proj = self.get_request_param(request_dict, "proj", False) if proj is not False: # remove the facet field, since we're already filtering with it self.remove_from_default_facet_fields(SolrDocument.ROOT_PROJECT_SOLR) proj_query = qm.process_proj(proj) query["fq"] += proj_query["fq"] query["facet.field"] += proj_query["facet.field"] # Dublin-Core terms dc_terms_obj = DCterms() dc_params = dc_terms_obj.get_dc_params_list() for dc_param in dc_params: dc_terms = self.get_request_param(request_dict, dc_param, False, True) if dc_terms is not False: dc_query = qm.process_dc_term(dc_param, dc_terms) query["fq"] += dc_query["fq"] query["facet.field"] += dc_query["facet.field"] if dc_param == "dc-temporal": child_context_join = False # turn this off # item-types item_type = self.get_request_param(request_dict, "type", False, False) if item_type is not False: # remove the facet field, since we're already filtering with it self.remove_from_default_facet_fields("item_type") it_query = qm.process_item_type(item_type) query["fq"] += it_query["fq"] query["facet.field"] += it_query["facet.field"] """ If a item_type_limit is set, then we're doing a specialized search that looks only for a certain item_type. """ if self.item_type_limit is not False: query["fq"].append("item_type:" + self.item_type_limit) if self.item_type_limit in self.ITEM_TYPE_ROWS: query["rows"] = self.ITEM_TYPE_ROWS[self.item_type_limit] if self.item_type_limit in self.ITEM_TYPE_FACET_MIN: query["facet.mincount"] = self.ITEM_TYPE_FACET_MIN[self.item_type_limit] if self.item_type_limit in self.ITEM_TYPE_FACETFIELDS: for add_facet_field in self.ITEM_TYPE_FACETFIELDS[self.item_type_limit]: if add_facet_field not in query["facet.field"]: # add facet field for this type of item query["facet.field"].append(add_facet_field) """ CHRONOLOGY Form Use Life (form) queries """ # now add form-use-life chronology form_chrono = self.get_request_param(request_dict, "form-chronotile", False, False) if form_chrono is not False: # query for form-use-live chronological tile form_chrono_query = qm.process_form_use_life_chrono(form_chrono) query["fq"] += form_chrono_query["fq"] query["facet.field"] += form_chrono_query["facet.field"] query["f.form_use_life_chrono_tile.facet.limit"] = -1 else: # Add default form-use-life chronology query = self.add_root_form_use_life_chrono(query, request_dict) form_start = self.get_request_param(request_dict, "form-start", False, False) if form_start is not False: # query for form-use-live start date form_start_query = qm.process_form_date_chrono(self, form_use_life_date, "start") query["fq"] += form_start_query["fq"] form_stop = self.get_request_param(request_dict, "form-stop", False, False) if form_stop is not False: # query for form-use-live stop date form_stop_query = qm.process_form_date_chrono(self, form_use_life_date, "stop") query["fq"] += form_stop_query["fq"] """ Linked media (images, documents, other) queries """ # images images = self.get_request_param(request_dict, "images", False, False) if images is not False: query["fq"] += ["image_media_count:[1 TO *]"] # other media (not images) other_media = self.get_request_param(request_dict, "other-media", False, False) if other_media is not False: query["fq"] += ["other_binary_media_count:[1 TO *]"] # other media (not images) documents = self.get_request_param(request_dict, "documents", False, False) if documents is not False: query["fq"] += ["document_count:[1 TO *]"] """ Geospatial (discovery location) queries """ # now add discovery geo location disc_geo = self.get_request_param(request_dict, "disc-geotile", False, False) if disc_geo is not False: disc_geo_query = qm.process_discovery_geo(disc_geo) query["fq"] += disc_geo_query["fq"] query["facet.field"] += disc_geo_query["facet.field"] query["f.discovery_geotile.facet.limit"] = -1 else: # Add default geofacet query = self.add_root_discovery_geo(query, request_dict) # geospatial bounding box query disc_bbox = self.get_request_param(request_dict, "disc-bbox", False, False) if disc_bbox is not False: disc_bbox_query = qm.process_discovery_bbox(disc_bbox) query["fq"] += disc_bbox_query["fq"] # get items with a URI (or slug) indentified object obj = self.get_request_param(request_dict, "obj", False) if obj is not False: obj_query = qm.process_ld_object(obj) query["fq"] += obj_query["fq"] """ ----------------------------------------- Add default facet fields, used for most searches ----------------------------------------- """ query = self.add_default_facet_fields(query, request_dict) """ ----------------------------------------- Additional, dataset specific specialized queries ----------------------------------------- """ # special queries (to simplify access to specific datasets) spsearch = SpecialSearches() response = self.get_request_param(request_dict, "response", False, False) if response is not False: if "geo-project" in response: # request for special handling of project facets with # added geospatial and chronological metadata query = spsearch.process_geo_projects(query) linked = self.get_request_param(request_dict, "linked", False, False) if linked == "dinaa-cross-ref": query = spsearch.process_linked_dinaa(query) trinomial = self.get_request_param(request_dict, "trinomial", False, False) if trinomial is not False: query = spsearch.process_trinonial_reconcile(trinomial, query) reconcile = self.get_request_param(request_dict, "reconcile", False, True) if reconcile is not False: query = spsearch.process_reconcile(reconcile, query) if len(self.prequery_stats) > 0: # we have fields that need a stats prequery statsq = StatsQuery() statsq.q = query["q"] if "q.op" in query: statsq.q_op = query["q.op"] statsq.fq = query["fq"] statsq.stats_fields = self.prequery_stats query = statsq.add_stats_ranges_from_solr(query) # Now set aside entities used as search filters self.gather_entities(qm.entities) if child_context_join: all_fq = False for fq in query["fq"]: if all_fq is False: all_fq = "(" + fq + ")" else: all_fq += " AND (" + fq + ")" all_fq = "(" + all_fq + ")" joined_fq = "{!join from=slug_type_uri_label to=obj_all___context_id}" + all_fq query["fq"] = all_fq + ' OR _query_:"' + joined_fq + '"' return query
def compose_query(self, request_dict): """ composes the search query based on the request_dict """ qm = QueryMaker() child_context_join = False # do a JOIN to include children in results query = {} query['facet'] = 'true' query['facet.mincount'] = 1 query['rows'] = self.rows query['start'] = self.start query['debugQuery'] = 'false' query['fq'] = [] query['facet.field'] = [] query['facet.range'] = [] query['stats'] = 'true' query['stats.field'] = self.stats_fields query['sort'] = SortingOptions.DEFAULT_SOLR_SORT s_param = self.get_request_param(request_dict, 'sort', False, False) if s_param is not False: # add custom sorting sort_opts = SortingOptions() query['sort'] = sort_opts.make_solr_sort_param(s_param) # If the user does not provide a search term, search for everything query['q'] = '*:*' # defaul search for all q_param = self.get_request_param(request_dict, 'q', False, False) if q_param is not False: escaped_terms = qm.prep_string_search_term(q_param) query['q'] = 'text:' + ' '.join(escaped_terms) query['q.op'] = 'AND' query['hl'] = 'true' query['hl.fl'] = 'text' query['hl.q'] = 'text:' + ' '.join(escaped_terms) start = self.get_request_param(request_dict, 'start', False, False) if start is not False: query['start'] = re.sub(r'[^\d]', r'', str(start)) rows = self.get_request_param(request_dict, 'rows', False, False) if rows is not False: rows = re.sub(r'[^\d]', r'', str(rows)) rows = int(float(rows)) if rows > self.max_rows: rows = self.max_rows elif rows < 0: rows = 0 query['rows'] = rows # Spatial Context if 'path' in request_dict and self.do_context_paths: self.remove_from_default_facet_fields( SolrDocument.ROOT_CONTEXT_SOLR) print('context starts as: ' + str(request_dict['path'])) context = qm._process_spatial_context(request_dict['path']) query['fq'].append(context['fq']) query['facet.field'] += context[ 'facet.field'] # context facet fields, always a list # Properties and Linked Data props = self.get_request_param(request_dict, 'prop', False, True) if props is not False: for act_prop in props: # process each prop independently. prop_query = qm.process_prop(act_prop) query['fq'] += prop_query['fq'] query['facet.field'] += prop_query['facet.field'] query['stats.field'] += prop_query['stats.field'] query['facet.range'] += prop_query['facet.range'] if 'ranges' in prop_query: for key, value in prop_query['ranges'].items(): query[key] = value if 'hl-queries' in prop_query: query['hl'] = 'true' query['hl.fl'] = 'text' # query['hl.snippets'] = 2 for q_term in prop_query['hl-queries']: if 'hl.q' in query: query['hl.q'] += ' OR (' + q_term + ')' else: query['hl.q'] = q_term if 'prequery-stats' in prop_query: # we have fields that need a stats prequery self.prequery_stats += prop_query['prequery-stats'] # Project proj = self.get_request_param(request_dict, 'proj', False) if proj is not False: # remove the facet field, since we're already filtering with it self.remove_from_default_facet_fields( SolrDocument.ROOT_PROJECT_SOLR) proj_query = qm.process_proj(proj) query['fq'] += proj_query['fq'] query['facet.field'] += proj_query['facet.field'] # Dublin-Core terms dc_query_term_exists = False dc_terms_obj = DCterms() dc_params = dc_terms_obj.get_dc_params_list() for dc_param in dc_params: dc_terms = self.get_request_param(request_dict, dc_param, False, True) if dc_terms is not False: dc_query_term_exists = True dc_query = qm.process_dc_term(dc_param, dc_terms) query['fq'] += dc_query['fq'] query['facet.field'] += dc_query['facet.field'] if dc_param == 'dc-temporal': child_context_join = False # turn this off # item-types item_type = self.get_request_param(request_dict, 'type', False, False) if item_type is not False: # remove the facet field, since we're already filtering with it self.remove_from_default_facet_fields('item_type') # indicate that the item_type_limit is in effect self.item_type_limited = True it_query = qm.process_item_type(item_type) query['fq'] += it_query['fq'] query['facet.field'] += it_query['facet.field'] """ If a item_type_limit is set, then we're doing a specialized search that looks only for a certain item_type. """ if self.item_type_limit is not False: # indicate that the item_type_limit is in effect self.item_type_limited = True query['fq'].append('item_type:' + self.item_type_limit) if self.item_type_limit in self.ITEM_TYPE_ROWS: query['rows'] = self.ITEM_TYPE_ROWS[self.item_type_limit] if self.item_type_limit in self.ITEM_TYPE_FACET_MIN: query['facet.mincount'] = self.ITEM_TYPE_FACET_MIN[ self.item_type_limit] if dc_query_term_exists is True and query['facet.mincount'] > 1: # we're already limiting by a DC terms search, so allow all # search facets query['facet.mincount'] = 1 if self.item_type_limit in self.ITEM_TYPE_FACETFIELDS: for add_facet_field in self.ITEM_TYPE_FACETFIELDS[ self.item_type_limit]: if add_facet_field not in query['facet.field']: # add facet field for this type of item query['facet.field'].append(add_facet_field) else: cat_field_found = False for item_cat_field in self.ITEM_CAT_FIELDS: for facet_field in query['facet.field']: if item_cat_field in facet_field: cat_field_found = True if cat_field_found is False: query['facet.field'].append('item_type') """ CHRONOLOGY Form Use Life (form) queries """ # now add form-use-life chronology form_chrono = self.get_request_param(request_dict, 'form-chronotile', False, False) if form_chrono is not False: # query for form-use-live chronological tile form_chrono_query = qm.process_form_use_life_chrono(form_chrono) query['fq'] += form_chrono_query['fq'] query['facet.field'] += form_chrono_query['facet.field'] query['f.form_use_life_chrono_tile.facet.limit'] = -1 else: # Add default form-use-life chronology query = self.add_root_form_use_life_chrono(query, request_dict) form_start = self.get_request_param(request_dict, 'form-start', False, False) if form_start is not False: # query for form-use-live start date form_start_query = qm.process_form_date_chrono(form_start, 'start') query['fq'] += form_start_query['fq'] form_stop = self.get_request_param(request_dict, 'form-stop', False, False) if form_stop is not False: # query for form-use-live stop date form_stop_query = qm.process_form_date_chrono(form_stop, 'stop') query['fq'] += form_stop_query['fq'] """ Updated and Published Times """ updated = self.get_request_param(request_dict, 'updated', False, False) if updated is not False: # query for when the resource was updated query['fq'].append('updated:' + updated) published = self.get_request_param(request_dict, 'published', False, False) if published is not False: # query for when the resource was published query['fq'].append('published:' + published) """ query by uuid uri, or other identifier """ uuid = self.get_request_param(request_dict, 'uuid', False, False) if uuid is not False: query['fq'].append('uuid:' + uuid) identifier = self.get_request_param(request_dict, 'id', False, False) if identifier is not False: id_query = qm.process_id(identifier) query['fq'] += id_query['fq'] """ Linked media (images, documents, other) queries """ # images images = self.get_request_param(request_dict, 'images', False, False) if images is not False: query['fq'] += ['image_media_count:[1 TO *]'] # other media (not images) other_media = self.get_request_param(request_dict, 'other-media', False, False) if other_media is not False: query['fq'] += ['other_binary_media_count:[1 TO *]'] # other media (not images) documents = self.get_request_param(request_dict, 'documents', False, False) if documents is not False: query['fq'] += ['document_count:[1 TO *]'] """ Geospatial (discovery location) queries """ # now add discovery geo location disc_geo = self.get_request_param(request_dict, 'disc-geotile', False, False) if disc_geo is not False: disc_geo_query = qm.process_discovery_geo(disc_geo) query['fq'] += disc_geo_query['fq'] query['facet.field'] += disc_geo_query['facet.field'] query['f.discovery_geotile.facet.limit'] = -1 else: # Add default geofacet query = self.add_root_discovery_geo(query, request_dict) # geospatial bounding box query disc_bbox = self.get_request_param(request_dict, 'disc-bbox', False, False) if disc_bbox is not False: disc_bbox_query = qm.process_discovery_bbox(disc_bbox) query['fq'] += disc_bbox_query['fq'] # get items with a URI (or slug) indentified object obj = self.get_request_param(request_dict, 'obj', False) if obj is not False: obj_query = qm.process_ld_object(obj) query['fq'] += obj_query['fq'] """ ----------------------------------------- Add default facet fields, used for most searches ----------------------------------------- """ query = self.add_default_facet_fields(query, request_dict) """ ----------------------------------------- Additional, dataset specific specialized queries ----------------------------------------- """ # special queries (to simplify access to specific datasets) spsearch = SpecialSearches() response = self.get_request_param(request_dict, 'response', False, False) if response is not False: if 'geo-project' in response: # request for special handling of project facets with # added geospatial and chronological metadata query = spsearch.process_geo_projects(query) linked = self.get_request_param(request_dict, 'linked', False, False) if linked == 'dinaa-cross-ref': query = spsearch.process_linked_dinaa(query) trinomial = self.get_request_param(request_dict, 'trinomial', False, False) if trinomial is not False: query = spsearch.process_trinonial_reconcile(trinomial, query) reconcile = self.get_request_param(request_dict, 'reconcile', False, True) if reconcile is not False: query = spsearch.process_reconcile(reconcile, query) if len(self.prequery_stats) > 0: # we have fields that need a stats prequery statsq = StatsQuery() statsq.q = query['q'] if 'q.op' in query: statsq.q_op = query['q.op'] statsq.fq = query['fq'] statsq.stats_fields = self.prequery_stats query = statsq.add_stats_ranges_from_solr(query) if child_context_join: all_fq = False for fq in query['fq']: if all_fq is False: all_fq = '(' + fq + ')' else: all_fq += ' AND (' + fq + ')' all_fq = '(' + all_fq + ')' joined_fq = '{!join from=slug_type_uri_label to=obj_all___context_id}' + all_fq query['fq'] = all_fq + ' OR _query_:"' + joined_fq + '"' # now clean the stats fields to make sure we're not repeading ourselves if len(query['stats.field']) > 0: unique_stats_fields = [] for stats_field in query['stats.field']: if stats_field not in unique_stats_fields: unique_stats_fields.append(stats_field) query['stats.field'] = unique_stats_fields return query