def make_specialized_facet_value_obj(self, solr_facet_key, solr_facet_value_key, solr_facet_count): """ makes a facet_value obj for specialzied solr faccets """ fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request_by_solr_field(solr_facet_key, solr_facet_value_key) output = LastUpdatedOrderedDict() output['id'] = fl.make_request_url(new_rparams) output['json'] = fl.make_request_url(new_rparams, '.json') output['label'] = False if solr_facet_key == 'item_type': if solr_facet_value_key in QueryMaker.TYPE_URIS: output['rdfs:isDefinedBy'] = QueryMaker.TYPE_URIS[ solr_facet_value_key] entity = self.get_entity(output['rdfs:isDefinedBy']) if entity is not False: output['label'] = entity.label output['count'] = solr_facet_count output['slug'] = solr_facet_value_key output['data-type'] = 'id' return output
def add_date_fields(self, solr_json): """ adds numeric fields with query options """ date_fields = [] date_facet_ranges = self.get_solr_ranges(solr_json, 'date') if date_facet_ranges is not False: for solr_field_key, ranges in date_facet_ranges.items(): facet_key_list = solr_field_key.split('___') slug = facet_key_list[0].replace('_', '-') # check to see if the field is a linkded data field # if so, it needs some help with making Filter Links linked_field = False field_entity = self.get_entity(slug) if field_entity is not False: self.add_active_facet_field(slug) if field_entity.item_type == 'uri': linked_field = True field = self.get_facet_meta(solr_field_key) field['oc-api:min-date'] = ranges['start'] field['oc-api:max-date'] = ranges['end'] field['oc-api:gap-date'] = ranges['gap'] field['oc-api:has-range-options'] = [] i = -1 qm = QueryMaker() for range_min_key in ranges['counts'][::2]: i += 2 solr_count = ranges['counts'][i] fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context fl.partial_param_val_match = True dt_end = qm.add_solr_gap_to_date(range_min_key, ranges['gap']) range_end = qm.convert_date_to_solr_date(dt_end) solr_range = '[' + range_min_key + ' TO ' + range_end + ' ]' new_rparams = fl.add_to_request('prop', solr_range, slug) range_dict = LastUpdatedOrderedDict() range_dict['id'] = fl.make_request_url(new_rparams) range_dict['json'] = fl.make_request_url( new_rparams, '.json') range_dict['label'] = qm.make_human_readable_date( range_min_key) + ' to ' + qm.make_human_readable_date( range_end) range_dict['count'] = solr_count range_dict['oc-api:min-date'] = range_min_key range_dict['oc-api:max-date'] = range_end field['oc-api:has-range-options'].append(range_dict) date_fields.append(field) if len(date_fields) > 0 and 'facet' in self.act_responses: self.json_ld['oc-api:has-date-facets'] = date_fields
def make_rel_media_option(self, rel_media_count, rel_media_type, rel_media_options): """ makes a facet option for related media """ if rel_media_count > 0: fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context option = LastUpdatedOrderedDict() new_rparams = fl.add_to_request_by_solr_field(rel_media_type, '1') option['id'] = fl.make_request_url(new_rparams) option['json'] = fl.make_request_url(new_rparams, '.json') if rel_media_type == 'images': option['label'] = 'Linked with images' elif rel_media_type == 'other-media': option['label'] = 'Linked with media (non-image)' elif rel_media_type == 'documents': option['label'] = 'Linked with documents' option['count'] = rel_media_count rel_media_options.append(option) return rel_media_options
def make_specialized_facet_value_obj(self, solr_facet_key, solr_facet_value_key, solr_facet_count): """ makes a facet_value obj for specialzied solr faccets """ fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request_by_solr_field(solr_facet_key, solr_facet_value_key) output = LastUpdatedOrderedDict() output['id'] = fl.make_request_url(new_rparams) output['json'] = fl.make_request_url(new_rparams, '.json') output['label'] = False if solr_facet_key == 'item_type': if solr_facet_value_key in QueryMaker.TYPE_URIS: output['rdfs:isDefinedBy'] = QueryMaker.TYPE_URIS[solr_facet_value_key] entity = self.get_entity(output['rdfs:isDefinedBy']) if entity is not False: output['label'] = entity.label output['count'] = solr_facet_count output['slug'] = solr_facet_value_key output['data-type'] = 'id' return output
def add_numeric_fields(self, solr_json): """ adds numeric fields with query options """ num_fields = [] num_facet_ranges = self.get_solr_ranges(solr_json, 'numeric') if num_facet_ranges is not False: for solr_field_key, ranges in num_facet_ranges.items(): facet_key_list = solr_field_key.split('___') slug = facet_key_list[0].replace('_', '-') # check to see if the field is a linkded data field # if so, it needs some help with making Filter Links linked_field = False field_entity = self.get_entity(slug) if field_entity is not False: self.add_active_facet_field(slug) if field_entity.item_type == 'uri': linked_field = True field = self.get_facet_meta(solr_field_key) field['oc-api:min'] = float(ranges['start']) field['oc-api:max'] = float(ranges['end']) gap = float(ranges['gap']) field['oc-api:gap'] = gap field['oc-api:has-range-options'] = [] i = -1 for range_min_key in ranges['counts'][::2]: i += 2 solr_count = ranges['counts'][i] fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context fl.partial_param_val_match = True range_start = float(range_min_key) range_end = range_start + gap solr_range = '[' + str(range_start) + ' TO ' + str( range_end) + ' ]' new_rparams = fl.add_to_request('prop', solr_range, slug) range_dict = LastUpdatedOrderedDict() range_dict['id'] = fl.make_request_url(new_rparams) range_dict['json'] = fl.make_request_url( new_rparams, '.json') range_dict['label'] = str(round(range_start, 3)) range_dict['count'] = solr_count range_dict['oc-api:min'] = range_start range_dict['oc-api:max'] = range_end field['oc-api:has-range-options'].append(range_dict) num_fields.append(field) if len(num_fields) > 0 and 'facet' in self.act_responses: self.json_ld['oc-api:has-numeric-facets'] = num_fields
def add_date_fields(self, solr_json): """ adds numeric fields with query options """ date_fields = [] date_facet_ranges = self.get_solr_ranges(solr_json, 'date') if date_facet_ranges is not False: for solr_field_key, ranges in date_facet_ranges.items(): facet_key_list = solr_field_key.split('___') slug = facet_key_list[0].replace('_', '-') # check to see if the field is a linkded data field # if so, it needs some help with making Filter Links linked_field = False field_entity = self.get_entity(slug) if field_entity is not False: self.add_active_facet_field(slug) if field_entity.item_type == 'uri': linked_field = True field = self.get_facet_meta(solr_field_key) field['oc-api:min-date'] = ranges['start'] field['oc-api:max-date'] = ranges['end'] field['oc-api:gap-date'] = ranges['gap'] field['oc-api:has-range-options'] = [] i = -1 qm = QueryMaker() for range_min_key in ranges['counts'][::2]: i += 2 solr_count = ranges['counts'][i] fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context fl.partial_param_val_match = True dt_end = qm.add_solr_gap_to_date(range_min_key, ranges['gap']) range_end = qm.convert_date_to_solr_date(dt_end) solr_range = '[' + range_min_key + ' TO ' + range_end + ' ]' new_rparams = fl.add_to_request('prop', solr_range, slug) range_dict = LastUpdatedOrderedDict() range_dict['id'] = fl.make_request_url(new_rparams) range_dict['json'] = fl.make_request_url(new_rparams, '.json') range_dict['label'] = qm.make_human_readable_date(range_min_key) + ' to ' + qm.make_human_readable_date(range_end) range_dict['count'] = solr_count range_dict['oc-api:min-date'] = range_min_key range_dict['oc-api:max-date'] = range_end field['oc-api:has-range-options'].append(range_dict) date_fields.append(field) if len(date_fields) > 0 and 'facet' in self.act_responses: self.json_ld['oc-api:has-date-facets'] = date_fields
def add_numeric_fields(self, solr_json): """ adds numeric fields with query options """ num_fields = [] num_facet_ranges = self.get_solr_ranges(solr_json, 'numeric') if num_facet_ranges is not False: for solr_field_key, ranges in num_facet_ranges.items(): facet_key_list = solr_field_key.split('___') slug = facet_key_list[0].replace('_', '-') # check to see if the field is a linkded data field # if so, it needs some help with making Filter Links linked_field = False field_entity = self.get_entity(slug) if field_entity is not False: self.add_active_facet_field(slug) if field_entity.item_type == 'uri': linked_field = True field = self.get_facet_meta(solr_field_key) field['oc-api:min'] = float(ranges['start']) field['oc-api:max'] = float(ranges['end']) gap = float(ranges['gap']) field['oc-api:gap'] = gap field['oc-api:has-range-options'] = [] i = -1 for range_min_key in ranges['counts'][::2]: i += 2 solr_count = ranges['counts'][i] fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context fl.partial_param_val_match = True range_start = float(range_min_key) range_end = range_start + gap solr_range = '[' + str(range_start) + ' TO ' + str(range_end) + ' ]' new_rparams = fl.add_to_request('prop', solr_range, slug) range_dict = LastUpdatedOrderedDict() range_dict['id'] = fl.make_request_url(new_rparams) range_dict['json'] = fl.make_request_url(new_rparams, '.json') range_dict['label'] = str(round(range_start,3)) range_dict['count'] = solr_count range_dict['oc-api:min'] = range_start range_dict['oc-api:max'] = range_end field['oc-api:has-range-options'].append(range_dict) num_fields.append(field) if len(num_fields) > 0 and 'facet' in self.act_responses: self.json_ld['oc-api:has-numeric-facets'] = num_fields
def make_facet_value_obj(self, solr_facet_key, solr_facet_value_key, solr_facet_count): """ Makes an last-ordered-dict for a facet """ facet_key_list = solr_facet_value_key.split('___') if len(facet_key_list) == 4: # ---------------------------- # Case where facet values are encoded as: # slug___data-type___/uri-item-type/uuid___label # ---------------------------- data_type = facet_key_list[1] if 'http://' in facet_key_list[2] or 'https://' in facet_key_list[ 2]: is_linked_data = True else: is_linked_data = False fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context fl.partial_param_val_match = is_linked_data # allow partial matches of parameters. output = LastUpdatedOrderedDict() slug = facet_key_list[0] new_rparams = fl.add_to_request_by_solr_field(solr_facet_key, slug) output['id'] = fl.make_request_url(new_rparams) output['json'] = fl.make_request_url(new_rparams, '.json') if is_linked_data: output['rdfs:isDefinedBy'] = facet_key_list[2] else: output[ 'rdfs:isDefinedBy'] = settings.CANONICAL_HOST + facet_key_list[ 2] output['label'] = facet_key_list[3] output['count'] = solr_facet_count output['slug'] = slug output['data-type'] = data_type else: # ---------------------------- # Sepcilized cases of non-encoded facet values # ---------------------------- output = self.make_specialized_facet_value_obj( solr_facet_key, solr_facet_value_key, solr_facet_count) return output
def make_paging_links(self, start, rows, ini_request_dict_json): """ makes links for paging for start, rows, with an initial request dict json string a big of a hassle to avoid memory errors with FilterLinks() """ start = int(start) start = str(start) rows = str(rows) fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = ini_request_dict_json fl.spatial_context = self.spatial_context fl.remove_start_param = False start_rparams = fl.add_to_request('start', start) start_rparams_json = json.dumps(start_rparams, ensure_ascii=False, indent=4) fl.base_request_json = start_rparams_json new_rparams = fl.add_to_request('rows', rows) return fl.make_request_urls(new_rparams)
def process_solr_tiles(self, solr_tiles): """ processes the solr_json discovery geo tiles, aggregating to a certain depth """ # first aggregate counts for tile that belong togther aggregate_tiles = self.aggregate_spatial_tiles(solr_tiles) # now generate GeoJSON for each tile region # print('Total tiles: ' + str(t) + ' reduced to ' + str(len(aggregate_tiles))) i = 0 for tile_key, aggregate_count in aggregate_tiles.items(): i += 1 add_region = True fl = FilterLinks() fl.base_request_json = self.filter_request_dict_json fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request('disc-geotile', tile_key) record = LastUpdatedOrderedDict() record['id'] = fl.make_request_url(new_rparams) record['json'] = fl.make_request_url(new_rparams, '.json') record['count'] = aggregate_count record['type'] = 'Feature' record['category'] = 'oc-api:geo-facet' if self.min_date is not False \ and self.max_date is not False: when = LastUpdatedOrderedDict() when['id'] = '#event-' + tile_key when['type'] = 'oc-gen:formation-use-life' # convert numeric to GeoJSON-LD ISO 8601 when['start'] = ISOyears().make_iso_from_float(self.min_date) when['stop'] = ISOyears().make_iso_from_float(self.max_date) record['when'] = when gm = GlobalMercator() geo_coords = gm.quadtree_to_geojson_poly_coords(tile_key) geometry = LastUpdatedOrderedDict() geometry['id'] = '#geo-disc-tile-geom-' + tile_key geometry['type'] = 'Polygon' geometry['coordinates'] = geo_coords record['geometry'] = geometry properties = LastUpdatedOrderedDict() properties['id'] = '#geo-disc-tile-' + tile_key properties['href'] = record['id'] properties['label'] = 'Discovery region (' + str(i) + ')' properties['feature-type'] = 'discovery region (facet)' properties['count'] = aggregate_count properties['early bce/ce'] = self.min_date properties['late bce/ce'] = self.max_date record['properties'] = properties if len(tile_key) >= 6: if tile_key[:6] == '211111': # no bad coordinates (off 0, 0 coast of Africa) add_region = False # don't display items without coordinates if add_region: self.geojson_regions.append(record)
def make_facet_value_obj(self, solr_facet_key, solr_facet_value_key, solr_facet_count): """ Makes an last-ordered-dict for a facet """ facet_key_list = solr_facet_value_key.split('___') if len(facet_key_list) == 4: # ---------------------------- # Case where facet values are encoded as: # slug___data-type___/uri-item-type/uuid___label # ---------------------------- data_type = facet_key_list[1] if 'http://' in facet_key_list[2] or 'https://' in facet_key_list[2]: is_linked_data = True else: is_linked_data = False fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context fl.partial_param_val_match = is_linked_data # allow partial matches of parameters. output = LastUpdatedOrderedDict() slug = facet_key_list[0] new_rparams = fl.add_to_request_by_solr_field(solr_facet_key, slug) output['id'] = fl.make_request_url(new_rparams) output['json'] = fl.make_request_url(new_rparams, '.json') if is_linked_data: output['rdfs:isDefinedBy'] = facet_key_list[2] else: output['rdfs:isDefinedBy'] = settings.CANONICAL_HOST + facet_key_list[2] output['label'] = facet_key_list[3] output['count'] = solr_facet_count output['slug'] = slug output['data-type'] = data_type else: # ---------------------------- # Sepcilized cases of non-encoded facet values # ---------------------------- output = self.make_specialized_facet_value_obj(solr_facet_key, solr_facet_value_key, solr_facet_count) return output
def process_geo(self): """ processes the solr_json discovery geo tiles, aggregating to a certain depth """ if isinstance(self.geo_pivot, list): i = 0 for proj in self.geo_pivot: i += 1 add_feature = True project_key = proj['value'] proj_ex = project_key.split('___') slug = proj_ex[0] uri = self.make_url_from_val_string(proj_ex[2]) href = self.make_url_from_val_string(proj_ex[2], False) label = proj_ex[3] fl = FilterLinks() fl.base_request_json = self.filter_request_dict_json fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request('proj', slug) if 'response' in new_rparams: new_rparams.pop('response', None) record = LastUpdatedOrderedDict() record['id'] = fl.make_request_url(new_rparams) record['json'] = fl.make_request_url(new_rparams, '.json') record['count'] = proj['count'] record['type'] = 'Feature' record['category'] = 'oc-api:geo-project' min_date = False max_date = False if project_key in self.projects: min_date = self.projects[project_key]['min_date'] max_date = self.projects[project_key]['max_date'] if min_date is not False \ and max_date is not False: when = LastUpdatedOrderedDict() when['id'] = '#event-' + slug when['type'] = 'oc-gen:formation-use-life' # convert numeric to GeoJSON-LD ISO 8601 when['start'] = ISOyears().make_iso_from_float( self.projects[project_key]['min_date']) when['stop'] = ISOyears().make_iso_from_float( self.projects[project_key]['max_date']) record['when'] = when if 'pivot' not in proj: add_feature = False else: geometry = LastUpdatedOrderedDict() geometry['id'] = '#geo-geom-' + slug geometry['type'] = 'Point' pivot_count_total = 0 total_lon = 0 total_lat = 0 for geo_data in proj['pivot']: pivot_count_total += geo_data['count'] gm = GlobalMercator() bounds = gm.quadtree_to_lat_lon(geo_data['value']) mean_lon = (bounds[1] + bounds[3]) / 2 mean_lat = (bounds[0] + bounds[2]) / 2 total_lon += mean_lon * geo_data['count'] total_lat += mean_lat * geo_data['count'] weighted_mean_lon = total_lon / pivot_count_total weighted_mean_lat = total_lat / pivot_count_total geometry['coordinates'] = [ weighted_mean_lon, weighted_mean_lat ] record['geometry'] = geometry # now make a link to search records for this project fl = FilterLinks() fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request('proj', slug) search_link = fl.make_request_url(new_rparams) properties = LastUpdatedOrderedDict() properties['id'] = '#geo-proj-' + slug properties['uri'] = uri properties['href'] = href properties['search'] = search_link properties['label'] = label properties['feature-type'] = 'project ' properties['count'] = proj['count'] properties['early bce/ce'] = min_date properties['late bce/ce'] = max_date record['properties'] = properties if add_feature: self.geojson_projects.append(record)
def process_solr_polygons(self, solr_polygons): """ processes the solr_json discovery geo tiles, aggregating to a certain depth """ if self.response_zoom_scope >= self.polygon_min_zoom_scope: # we're at a zoom level small enough to make it # worthwile to return complex contained-in polygon features self.get_polygon_db_objects(solr_polygons) i = 0 cnt_i = -1 for poly_key in solr_polygons[::2]: cnt_i += 2 solr_facet_count = solr_polygons[cnt_i] parsed_key = self.parse_solr_value_parts(poly_key) # print('Key: ' + str(parsed_key)) uuid = parsed_key['uuid'] if isinstance(uuid, str): if uuid in self.subjects_objs \ and uuid in self.geo_objs: # we have Subjects and Geospatial models for this # uuid subj_obj = self.subjects_objs[uuid] geo_obj = self.geo_objs[uuid] i += 1 fl = FilterLinks() fl.base_request_json = self.filter_request_dict_json fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request( 'path', subj_obj.context) record = LastUpdatedOrderedDict() record['id'] = fl.make_request_url(new_rparams) record['json'] = fl.make_request_url( new_rparams, '.json') record['count'] = solr_facet_count record['type'] = 'Feature' record['category'] = 'oc-api:geo-contained-in-feature' if self.min_date is not False \ and self.max_date is not False: when = LastUpdatedOrderedDict() when['id'] = '#event-feature-' + uuid when['type'] = 'oc-gen:formation-use-life' # convert numeric to GeoJSON-LD ISO 8601 when['start'] = ISOyears().make_iso_from_float( self.min_date) when['stop'] = ISOyears().make_iso_from_float( self.max_date) record['when'] = when geometry = LastUpdatedOrderedDict() geometry['id'] = '#geo-disc-feature-geom-' + uuid geometry['type'] = geo_obj.ftype coord_obj = json.loads(geo_obj.coordinates) v_geojson = ValidateGeoJson() coord_obj = v_geojson.fix_geometry_rings_dir( geo_obj.ftype, coord_obj) geometry['coordinates'] = coord_obj record['geometry'] = geometry properties = LastUpdatedOrderedDict() properties['id'] = '#geo-disc-feature-' + uuid properties['href'] = record['id'] properties['item-href'] = parsed_key['href'] properties['label'] = subj_obj.context properties['feature-type'] = 'containing-region' properties['count'] = solr_facet_count properties['early bce/ce'] = self.min_date properties['late bce/ce'] = self.max_date record['properties'] = properties dump = json.dumps(record, ensure_ascii=False, indent=4) geojson_obj = geojson.loads(dump) self.geojson_features.append(record)
def add_text_fields(self): """ adds text fields with query options """ text_fields = [] # first add a general key-word search option fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context q_request_dict = self.request_dict if 'q' not in q_request_dict: q_request_dict['q'] = [] param_vals = q_request_dict['q'] if len(param_vals) < 1: search_term = None else: search_term = param_vals[0] field = LastUpdatedOrderedDict() field['id'] = '#textfield-keyword-search' field['label'] = 'General Keyword Search' field['oc-api:search-term'] = search_term if search_term is False or search_term is None: new_rparams = fl.add_to_request_by_solr_field('q', '{SearchTerm}') field['oc-api:template'] = fl.make_request_url(new_rparams) field['oc-api:template-json'] = fl.make_request_url(new_rparams, '.json') else: param_search = param_vals[0].replace(search_term, '{SearchTerm}') rem_request = fl.make_request_sub(q_request_dict, 'q', search_term, '{SearchTerm}') field['oc-api:template'] = fl.make_request_url(rem_request) field['oc-api:template-json'] = fl.make_request_url(rem_request, '.json') text_fields.append(field) # now add an option looking in properties if 'prop' in self.request_dict: param_vals = self.request_dict['prop'] for param_val in param_vals: if self.hierarchy_delim in param_val: all_vals = param_val.split(self.hierarchy_delim) else: all_vals = [param_val] if len(all_vals) < 2: check_field = all_vals[0] search_term = None # no search term else: check_field = all_vals[-2] # penultimate value is the field search_term = all_vals[-1] # last value is search term check_dict = self.make_filter_label_dict(check_field) if check_dict['data-type'] == 'string': fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context field = LastUpdatedOrderedDict() field['id'] = '#textfield-' + check_dict['slug'] field['label'] = check_dict['label'] field['oc-api:search-term'] = search_term if len(check_dict['entities']) == 1: field['rdfs:isDefinedBy'] = check_dict['entities'][0].uri if search_term is False or search_term is None: param_search = param_val + self.hierarchy_delim + '{SearchTerm}' new_rparams = fl.add_to_request_by_solr_field('prop', param_search) field['oc-api:template'] = fl.make_request_url(new_rparams) field['oc-api:template-json'] = fl.make_request_url(new_rparams, '.json') else: param_search = param_val.replace(search_term, '{SearchTerm}') rem_request = fl.make_request_sub(self.request_dict, 'prop', param_val, param_search) field['oc-api:template'] = fl.make_request_url(rem_request) field['oc-api:template-json'] = fl.make_request_url(rem_request, '.json') text_fields.append(field) if len(text_fields) > 0: self.json_ld['oc-api:has-text-search'] = text_fields
def process_solr_tiles(self, solr_tiles): """ processes the solr_json discovery geo tiles, aggregating to a certain depth """ # first aggregate counts for tile that belong togther aggregate_tiles = LastUpdatedOrderedDict() i = -1 t = 0 if len(solr_tiles) <= 10: # don't aggregate if there's not much to aggregate self.aggregation_depth = self.max_depth for tile_key in solr_tiles[::2]: t += 1 i += 2 solr_facet_count = solr_tiles[i] if tile_key != 'false': if self.limiting_tile is False: ok_to_add = True else: # constrain to show facets ONLY within # the current queried tile if self.limiting_tile in tile_key: ok_to_add = True else: ok_to_add = False if ok_to_add: # first get full date range for # facets that are OK to add chrono_t = ChronoTile() dates = chrono_t.decode_path_dates(tile_key) if isinstance(dates, dict): if self.min_date is False: self.min_date = dates['earliest_bce'] self.max_date = dates['latest_bce'] else: if self.min_date > dates['earliest_bce']: self.min_date = dates['earliest_bce'] if self.max_date < dates['latest_bce']: self.max_date = dates['latest_bce'] # now aggregrate the OK to use facets trim_tile_key = tile_key[:self.aggregation_depth] if trim_tile_key not in aggregate_tiles: aggregate_tiles[trim_tile_key] = 0 aggregate_tiles[trim_tile_key] += solr_facet_count # now generate GeoJSON for each tile region # print('Chronology tiles: ' + str(t) + ' reduced to ' + str(len(aggregate_tiles))) # -------------------------------------------- # code to sort the list of tiles by start date and time span # -------------------------------------------- sorting_ranges = [] for tile_key, aggregate_count in aggregate_tiles.items(): chrono_t = ChronoTile() dates = chrono_t.decode_path_dates(tile_key) dates['tile_key'] = tile_key sorting_ranges.append(dates) # now sort by earliest bce, then reversed latest bce # this makes puts early dates with longest timespans first sorted_ranges = sorted(sorting_ranges, key=lambda k: (k['earliest_bce'], -k['latest_bce'])) sorted_tiles = LastUpdatedOrderedDict() for sort_range in sorted_ranges: tile_key = sort_range['tile_key'] sorted_tiles[tile_key] = aggregate_tiles[tile_key] i = 0 for tile_key, aggregate_count in sorted_tiles.items(): i += 1 fl = FilterLinks() fl.base_request_json = self.filter_request_dict_json fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request('form-chronotile', tile_key) record = LastUpdatedOrderedDict() record['id'] = fl.make_request_url(new_rparams) record['json'] = fl.make_request_url(new_rparams, '.json') record['count'] = aggregate_count record['category'] = 'oc-api:chrono-facet' chrono_t = ChronoTile() dates = chrono_t.decode_path_dates(tile_key) # convert numeric to GeoJSON-LD ISO 8601 record['start'] = ISOyears().make_iso_from_float( dates['earliest_bce']) record['stop'] = ISOyears().make_iso_from_float( dates['latest_bce']) properties = LastUpdatedOrderedDict() properties['early bce/ce'] = dates['earliest_bce'] properties['late bce/ce'] = dates['latest_bce'] record['properties'] = properties self.chrono_tiles.append(record)
def process_solr_tiles(self, solr_tiles): """ processes the solr_json discovery geo tiles, aggregating to a certain depth """ # first aggregate counts for tile that belong togther aggregate_tiles = LastUpdatedOrderedDict() i = -1 t = 0 if len(solr_tiles) <= 10: # don't aggregate if there's not much to aggregate self.aggregation_depth = self.max_depth for tile_key in solr_tiles[::2]: t += 1 i += 2 solr_facet_count = solr_tiles[i] if tile_key != 'false': if self.limiting_tile is False: ok_to_add = True else: # constrain to show facets ONLY within # the current queried tile if self.limiting_tile in tile_key: ok_to_add = True else: ok_to_add = False if ok_to_add: # first get full date range for # facets that are OK to add chrono_t = ChronoTile() dates = chrono_t.decode_path_dates(tile_key) if isinstance(dates, dict): if self.min_date is False: self.min_date = dates['earliest_bce'] self.max_date = dates['latest_bce'] else: if self.min_date > dates['earliest_bce']: self.min_date = dates['earliest_bce'] if self.max_date < dates['latest_bce']: self.max_date = dates['latest_bce'] # now aggregrate the OK to use facets trim_tile_key = tile_key[:self.aggregation_depth] if trim_tile_key not in aggregate_tiles: aggregate_tiles[trim_tile_key] = 0 aggregate_tiles[trim_tile_key] += solr_facet_count # now generate GeoJSON for each tile region # print('Chronology tiles: ' + str(t) + ' reduced to ' + str(len(aggregate_tiles))) # -------------------------------------------- # code to sort the list of tiles by start date and time span # -------------------------------------------- sorting_ranges = [] for tile_key, aggregate_count in aggregate_tiles.items(): chrono_t = ChronoTile() dates = chrono_t.decode_path_dates(tile_key) dates['tile_key'] = tile_key sorting_ranges.append(dates) # now sort by earliest bce, then reversed latest bce # this makes puts early dates with longest timespans first sorted_ranges = sorted(sorting_ranges, key=lambda k: (k['earliest_bce'], -k['latest_bce'])) sorted_tiles = LastUpdatedOrderedDict() for sort_range in sorted_ranges: tile_key = sort_range['tile_key'] sorted_tiles[tile_key] = aggregate_tiles[tile_key] i = 0 for tile_key, aggregate_count in sorted_tiles.items(): i += 1 fl = FilterLinks() fl.base_request_json = self.filter_request_dict_json fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request('form-chronotile', tile_key) record = LastUpdatedOrderedDict() record['id'] = fl.make_request_url(new_rparams) record['json'] = fl.make_request_url(new_rparams, '.json') record['count'] = aggregate_count record['category'] = 'oc-api:chrono-facet' chrono_t = ChronoTile() dates = chrono_t.decode_path_dates(tile_key) # convert numeric to GeoJSON-LD ISO 8601 record['start'] = ISOyears().make_iso_from_float(dates['earliest_bce']) record['stop'] = ISOyears().make_iso_from_float(dates['latest_bce']) properties = LastUpdatedOrderedDict() properties['early bce/ce'] = dates['earliest_bce'] properties['late bce/ce'] = dates['latest_bce'] record['properties'] = properties self.chrono_tiles.append(record)
def process_geo(self): """ processes the solr_json discovery geo tiles, aggregating to a certain depth """ if isinstance(self.geo_pivot, list): i = 0 for proj in self.geo_pivot: i += 1 add_feature = True project_key = proj['value'] proj_ex = project_key.split('___') slug = proj_ex[0] uri = self.make_url_from_val_string(proj_ex[2]) href = self.make_url_from_val_string(proj_ex[2], False) label = proj_ex[3] fl = FilterLinks() fl.base_request_json = self.filter_request_dict_json fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request('proj', slug) if 'response' in new_rparams: new_rparams.pop('response', None) record = LastUpdatedOrderedDict() record['id'] = fl.make_request_url(new_rparams) record['json'] = fl.make_request_url(new_rparams, '.json') record['count'] = proj['count'] record['type'] = 'Feature' record['category'] = 'oc-api:geo-project' min_date = False max_date = False if project_key in self.projects: min_date = self.projects[project_key]['min_date'] max_date = self.projects[project_key]['max_date'] if min_date is not False \ and max_date is not False: when = LastUpdatedOrderedDict() when['id'] = '#event-' + slug when['type'] = 'oc-gen:formation-use-life' # convert numeric to GeoJSON-LD ISO 8601 when['start'] = ISOyears().make_iso_from_float(self.projects[project_key]['min_date']) when['stop'] = ISOyears().make_iso_from_float(self.projects[project_key]['max_date']) record['when'] = when if 'pivot' not in proj: add_feature = False else: geometry = LastUpdatedOrderedDict() geometry['id'] = '#geo-geom-' + slug geometry['type'] = 'Point' pivot_count_total = 0 total_lon = 0 total_lat = 0 for geo_data in proj['pivot']: pivot_count_total += geo_data['count'] gm = GlobalMercator() bounds = gm.quadtree_to_lat_lon(geo_data['value']) mean_lon = (bounds[1] + bounds[3]) / 2 mean_lat = (bounds[0] + bounds[2]) / 2 total_lon += mean_lon * geo_data['count'] total_lat += mean_lat * geo_data['count'] weighted_mean_lon = total_lon / pivot_count_total weighted_mean_lat = total_lat / pivot_count_total geometry['coordinates'] = [weighted_mean_lon, weighted_mean_lat] record['geometry'] = geometry # now make a link to search records for this project fl = FilterLinks() fl.spatial_context = self.spatial_context new_rparams = fl.add_to_request('proj', slug) search_link = fl.make_request_url(new_rparams) properties = LastUpdatedOrderedDict() properties['id'] = '#geo-proj-' + slug properties['uri'] = uri properties['href'] = href properties['search'] = search_link properties['label'] = label properties['feature-type'] = 'project ' properties['count'] = proj['count'] properties['early bce/ce'] = min_date properties['late bce/ce'] = max_date record['properties'] = properties if add_feature: self.geojson_projects.append(record)
def add_text_fields(self): """ adds text fields with query options """ text_fields = [] # first add a general key-word search option fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context q_request_dict = self.request_dict if 'q' not in q_request_dict: q_request_dict['q'] = [] param_vals = q_request_dict['q'] if len(param_vals) < 1: search_term = None else: search_term = param_vals[0] field = LastUpdatedOrderedDict() field['id'] = '#textfield-keyword-search' field['label'] = 'General Keyword Search' field['oc-api:search-term'] = search_term if search_term is False or search_term is None: new_rparams = fl.add_to_request_by_solr_field('q', '{SearchTerm}') field['oc-api:template'] = fl.make_request_url(new_rparams) field['oc-api:template-json'] = fl.make_request_url( new_rparams, '.json') else: param_search = param_vals[0].replace(search_term, '{SearchTerm}') rem_request = fl.make_request_sub(q_request_dict, 'q', search_term, '{SearchTerm}') field['oc-api:template'] = fl.make_request_url(rem_request) field['oc-api:template-json'] = fl.make_request_url( rem_request, '.json') text_fields.append(field) # now add an option looking in properties if 'prop' in self.request_dict: param_vals = self.request_dict['prop'] for param_val in param_vals: if self.hierarchy_delim in param_val: all_vals = param_val.split(self.hierarchy_delim) else: all_vals = [param_val] if len(all_vals) < 2: check_field = all_vals[0] search_term = None # no search term else: check_field = all_vals[ -2] # penultimate value is the field search_term = all_vals[-1] # last value is search term check_dict = self.make_filter_label_dict(check_field) if check_dict['data-type'] == 'string': fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = self.request_dict_json fl.base_r_full_path = self.request_full_path fl.spatial_context = self.spatial_context field = LastUpdatedOrderedDict() field['id'] = '#textfield-' + check_dict['slug'] field['label'] = check_dict['label'] field['oc-api:search-term'] = search_term if len(check_dict['entities']) == 1: field['rdfs:isDefinedBy'] = check_dict['entities'][ 0].uri if search_term is False or search_term is None: param_search = param_val + self.hierarchy_delim + '{SearchTerm}' new_rparams = fl.add_to_request_by_solr_field( 'prop', param_search) field['oc-api:template'] = fl.make_request_url( new_rparams) field['oc-api:template-json'] = fl.make_request_url( new_rparams, '.json') else: param_search = param_val.replace( search_term, '{SearchTerm}') rem_request = fl.make_request_sub( self.request_dict, 'prop', param_val, param_search) field['oc-api:template'] = fl.make_request_url( rem_request) field['oc-api:template-json'] = fl.make_request_url( rem_request, '.json') text_fields.append(field) if len(text_fields) > 0: self.json_ld['oc-api:has-text-search'] = text_fields
def add_filters_json(self, request_dict): """ adds JSON describing search filters """ fl = FilterLinks() fl.base_search_link = self.base_search_link filters = [] string_fields = [] # so we have an interface for string searches i = 0 for param_key, param_vals in request_dict.items(): if param_key == 'path': if param_vals: i += 1 f_entity = self.m_cache.get_entity(param_vals) label = http.urlunquote_plus(param_vals) act_filter = LastUpdatedOrderedDict() act_filter['id'] = '#filter-' + str(i) act_filter['oc-api:filter'] = 'Context' act_filter['label'] = label.replace('||', ' OR ') if f_entity: act_filter['rdfs:isDefinedBy'] = f_entity.uri # generate a request dict without the context filter rem_request = fl.make_request_sub(request_dict, param_key, param_vals) act_filter['oc-api:remove'] = fl.make_request_url(rem_request) act_filter['oc-api:remove-json'] = fl.make_request_url(rem_request, '.json') filters.append(act_filter) else: for param_val in param_vals: i += 1 remove_geodeep = False act_filter = LastUpdatedOrderedDict() act_filter['id'] = '#filter-' + str(i) if self.hierarchy_delim in param_val: all_vals = param_val.split(self.hierarchy_delim) else: all_vals = [param_val] if param_key == 'proj': # projects, only care about the last item in the parameter value act_filter['oc-api:filter'] = 'Project' label_dict = self.make_filter_label_dict(all_vals[-1]) act_filter['label'] = label_dict['label'] if len(label_dict['entities']) == 1: act_filter['rdfs:isDefinedBy'] = label_dict['entities'][0].uri elif param_key == 'prop': # prop, the first item is the filter-label # the last is the filter act_filter['label'] = False if len(all_vals) < 2: act_filter['oc-api:filter'] = 'Description' act_filter['oc-api:filter-slug'] = all_vals[0] else: filt_dict = self.make_filter_label_dict(all_vals[0]) act_filter['oc-api:filter'] = filt_dict['label'] if 'slug' in filt_dict: act_filter['oc-api:filter-slug'] = filt_dict['slug'] if filt_dict['data-type'] == 'string': act_filter['label'] = 'Search Term: \'' + all_vals[-1] + '\'' if act_filter['label'] is False: label_dict = self.make_filter_label_dict(all_vals[-1]) act_filter['label'] = label_dict['label'] elif param_key == 'type': act_filter['oc-api:filter'] = 'Open Context Type' if all_vals[0] in QueryMaker.TYPE_MAPPINGS: type_uri = QueryMaker.TYPE_MAPPINGS[all_vals[0]] label_dict = self.make_filter_label_dict(type_uri) act_filter['label'] = label_dict['label'] else: act_filter['label'] = all_vals[0] elif param_key == 'q': act_filter['oc-api:filter'] = self.TEXT_SEARCH_TITLE act_filter['label'] = 'Search Term: \'' + all_vals[0] + '\'' elif param_key == 'id': act_filter['oc-api:filter'] = 'Identifier Lookup' act_filter['label'] = 'Identifier: \'' + all_vals[0] + '\'' elif param_key == 'form-chronotile': act_filter['oc-api:filter'] = 'Time of formation, use, or life' chrono = ChronoTile() dates = chrono.decode_path_dates(all_vals[0]) if isinstance(dates, dict): act_filter['label'] = 'Time range: ' + str(dates['earliest_bce']) act_filter['label'] += ' to ' + str(dates['latest_bce']) elif param_key == 'form-start': act_filter['oc-api:filter'] = 'Earliest formation, use, or life date' try: val_date = int(float(all_vals[0])) except: val_date = False if val_date is False: act_filter['label'] = '[Invalid year]' elif val_date < 0: act_filter['label'] = str(val_date * -1) + ' BCE' else: act_filter['label'] = str(val_date) + ' CE' elif param_key == 'form-stop': act_filter['oc-api:filter'] = 'Latest formation, use, or life date' try: val_date = int(float(all_vals[0])) except: val_date = False if val_date is False: act_filter['label'] = '[Invalid year]' elif val_date < 0: act_filter['label'] = str(val_date * -1) + ' BCE' else: act_filter['label'] = str(val_date) + ' CE' elif param_key == 'disc-geotile': act_filter['oc-api:filter'] = 'Location of discovery or observation' act_filter['label'] = self.make_geotile_filter_label(all_vals[0]) remove_geodeep = True elif param_key == 'disc-bbox': act_filter['oc-api:filter'] = 'Location of discovery or observation' act_filter['label'] = self.make_bbox_filter_label(all_vals[0]) remove_geodeep = True elif param_key == 'images': act_filter['oc-api:filter'] = 'Has related media' act_filter['label'] = 'Linked to images' elif param_key == 'other-media': act_filter['oc-api:filter'] = 'Has related media' act_filter['label'] = 'Linked to media (other than images)' elif param_key == 'documents': act_filter['oc-api:filter'] = 'Has related media' act_filter['label'] = 'Linked to documents' elif param_key == 'dc-subject': act_filter['oc-api:filter'] = 'Has subject metadata' label_dict = self.make_filter_label_dict(all_vals[-1]) if len(label_dict['label']) > 0: act_filter['label'] = label_dict['label'] if 'tdar' == all_vals[-1] or 'tdar*' == all_vals[-1]: act_filter['label'] = 'tDAR defined metadata record(s)' if len(label_dict['entities']) == 1: act_filter['rdfs:isDefinedBy'] = label_dict['entities'][0].uri if label_dict['entities'][0].vocabulary is not False: act_filter['label'] += ' in ' + label_dict['entities'][0].vocabulary elif param_key == 'dc-spatial': act_filter['oc-api:filter'] = 'Has spatial metadata' label_dict = self.make_filter_label_dict(all_vals[-1]) if len(label_dict['label']) > 0: act_filter['label'] = label_dict['label'] if len(label_dict['entities']) == 1: act_filter['rdfs:isDefinedBy'] = label_dict['entities'][0].uri if label_dict['entities'][0].vocabulary is not False: act_filter['label'] += ' in ' + label_dict['entities'][0].vocabulary elif param_key == 'dc-coverage': act_filter['oc-api:filter'] = 'Has coverage / period metadata' label_dict = self.make_filter_label_dict(all_vals[-1]) if len(label_dict['label']) > 0: act_filter['label'] = label_dict['label'] if len(label_dict['entities']) == 1: act_filter['rdfs:isDefinedBy'] = label_dict['entities'][0].uri if label_dict['entities'][0].vocabulary is not False: act_filter['label'] += ' in ' + label_dict['entities'][0].vocabulary elif param_key == 'dc-temporal': act_filter['oc-api:filter'] = 'Has temporal coverage' label_dict = self.make_filter_label_dict(all_vals[-1]) if len(label_dict['label']) > 0: act_filter['label'] = label_dict['label'] if len(label_dict['entities']) == 1: if label_dict['entities'][0].entity_type == 'vocabulary': act_filter['label'] = 'Concepts defined by: ' + label_dict['label'] elif 'periodo' in all_vals[-1]: act_filter['label'] = 'PeriodO defined concepts' if len(label_dict['entities']) == 1: act_filter['rdfs:isDefinedBy'] = label_dict['entities'][0].uri if label_dict['entities'][0].vocabulary is not False\ and label_dict['entities'][0].vocabulary != label_dict['label']: act_filter['label'] += ' in ' + label_dict['entities'][0].vocabulary elif param_key == 'obj': act_filter['oc-api:filter'] = 'Links (in some manner) to object' label_dict = self.make_filter_label_dict(all_vals[-1]) if len(label_dict['label']) > 0: act_filter['label'] = label_dict['label'] if len(label_dict['entities']) == 1: act_filter['rdfs:isDefinedBy'] = label_dict['entities'][0].uri if label_dict['entities'][0].vocabulary is not False: act_filter['label'] += ' in ' + label_dict['entities'][0].vocabulary elif param_key == 'dc-isReferencedBy': act_filter['oc-api:filter'] = 'Is referenced by' label_dict = self.make_filter_label_dict(all_vals[-1]) if len(label_dict['label']) > 0: act_filter['label'] = label_dict['label'] if len(label_dict['entities']) == 1: act_filter['rdfs:isDefinedBy'] = label_dict['entities'][0].uri if label_dict['entities'][0].vocabulary is not False\ and label_dict['entities'][0].vocab_uri != label_dict['entities'][0].uri: act_filter['label'] += ' in ' + label_dict['entities'][0].vocabulary elif param_key == 'linked' and all_vals[-1] == 'dinaa-cross-ref': act_filter['oc-api:filter'] = 'Has cross references' act_filter['label'] = 'Links to, or with, DINAA curated site files' else: act_filter = False if act_filter is not False: rem_request = fl.make_request_sub(request_dict, param_key, param_val) if 'geodeep' in rem_request and remove_geodeep: rem_request.pop('geodeep', None) act_filter['oc-api:remove'] = fl.make_request_url(rem_request) act_filter['oc-api:remove-json'] = fl.make_request_url(rem_request, '.json') filters.append(act_filter) return filters
def make_sort_links_list(self, request_dict): """ makes a list of the links for sort options """ if 'sort' in request_dict: request_dict.pop('sort') order_opts = [ {'key': 'asc', 'order': 'ascending'}, {'key': 'desc', 'order': 'descending'} ] for act_sort in self.SORT_OPTIONS: if act_sort['opt']: # only make sort_options if the 'opt' key is true if act_sort['value'] is not None: for order_opt in order_opts: act_sort_val = act_sort['value'] + self.order_sep + order_opt['key'] fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = json.dumps(request_dict, ensure_ascii=False, indent=4) fl.spatial_context = self.spatial_context sort_rparams = fl.add_to_request('sort', act_sort_val) links = fl.make_request_urls(sort_rparams) current_sort_obj = LastUpdatedOrderedDict() current_sort_obj['id'] = links['html'] current_sort_obj['json'] = links['json'] current_sort_obj['type'] = act_sort['type'] current_sort_obj['label'] = act_sort['label'] current_sort_obj['oc-api:sort-order'] = order_opt['order'] in_active_list = False for cur_act_sort in self.current_sorting: if act_sort['type'] == cur_act_sort['type'] \ and order_opt['order'] == cur_act_sort['oc-api:sort-order']: # the current sort option is ALREADY in use in_active_list = True if in_active_list is False: # only add the sort option if it's not already in use self.sort_links.append(current_sort_obj) else: if self.using_default_sorting is False: # only add a link to the default sorting if # we are not currently using it fl = FilterLinks() fl.base_search_link = self.base_search_link fl.base_request_json = json.dumps(request_dict, ensure_ascii=False, indent=4) fl.spatial_context = self.spatial_context links = fl.make_request_urls(request_dict) current_sort_obj = LastUpdatedOrderedDict() current_sort_obj['id'] = links['html'] current_sort_obj['json'] = links['json'] current_sort_obj['type'] = act_sort['type'] current_sort_obj['label'] = act_sort['label'] current_sort_obj['oc-api:sort-order'] = 'descending' self.sort_links.append(current_sort_obj)