def transient(self, data): data['$uri'] = url_for('entities_api.view', id=data.get('id')) data['$ui'] = entity_url(data.get('id')) if data.get('$bulk'): data['$writeable'] = False else: collection_id = data.get('collection_id') data['$writeable'] = request.authz.can_write(collection_id) return data
def entity_links(self, data, pk, schemata): return { 'self': url_for('entities_api.view', id=pk), # 'similar': url_for('entities_api.similar', id=pk), # 'documents': url_for('entities_api.documents', id=pk), 'references': url_for('entities_api.references', id=pk), 'tags': url_for('entities_api.tags', id=pk), 'ui': entity_url(pk) }
def reconcile_index(): domain = app_ui_url.strip('/') api_key = request.authz.role.api_key if request.authz.logged_in else None meta = { 'name': settings.APP_TITLE, 'identifierSpace': 'http://rdf.freebase.com/ns/type.object.id', 'schemaSpace': 'http://rdf.freebase.com/ns/type.object.id', 'view': { 'url': entity_url('{{id}}') }, 'preview': { 'url': entity_url('{{id}}') + '?api_key=%s' % api_key, 'width': 800, 'height': 400 }, 'suggest': { 'entity': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_entity', api_key=api_key) }, 'type': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_type') }, 'property': { 'service_url': domain, 'service_path': url_for('reconcile_api.suggest_property') } }, 'defaultTypes': [{ 'id': 'Entity', 'name': 'Persons and Companies' }] } return jsonify(meta)
def reconcile_op(query): """Reconcile operation for a single query.""" parser = SearchQueryParser({ 'limit': query.get('limit', '5'), 'strict': 'false' }, request.authz) name = query.get('query', '') schema = query.get('type') or 'Thing' entity = { 'id': 'fake', 'names': [name], 'fingerprints': [fingerprints.generate(name)], 'schemata': ensure_list(schema), 'schema': schema } for p in query.get('properties', []): entity[p.get('pid')] = ensure_list(p.get('v')) query = SimilarEntitiesQuery(parser, entity=entity) matches = [] for doc in query.search().get('hits').get('hits'): source = doc.get('_source') match = { 'id': doc.get('_id'), 'name': source.get('name'), 'score': min(100, doc.get('_score') * 10), 'uri': entity_url(doc.get('_id')), 'match': source.get('name') == name } for type_ in get_freebase_types(): if source['schema'] == type_['id']: match['type'] = [type_] matches.append(match) log.info("Reconciled: %r -> %d matches", name, len(matches)) return { 'result': matches, 'num': len(matches) }
def generate_matches_sheet(workbook, sheet, collection, match_collection, authz, links=True, one_sheet=False, offset=0, limit=1000): from aleph.views.serializers import MatchSchema if one_sheet: sheet_label = "All matches (top %s per collection)" % limit else: sheet_label = "%s (top %s)" % (match_collection.label, limit) sheet.set_zoom(125) parser = QueryParser({}, authz, limit=limit) q_match = Match.find_by_collection(collection.id, match_collection.id) matches = MatchQueryResult({}, q_match, parser=parser, schema=MatchSchema) if offset < 3: sheet.write(0, 0, '', workbook.header_format) sheet.write(1, 0, 'Score', workbook.header_format) sheet.merge_range(0, 1, 0, 4, collection.label, workbook.header_format) sheet.write(1, 1, 'Name', workbook.header_format) sheet.write(1, 2, 'Type', workbook.header_format) sheet.write(1, 3, 'Country', workbook.header_format) sheet.write(1, 4, 'Source URL', workbook.header_format) sheet.merge_range(0, 5, 0, 8, sheet_label, workbook.header_format) sheet.write(1, 5, 'Name', workbook.header_format) sheet.write(1, 6, 'Type', workbook.header_format) sheet.write(1, 7, 'Country', workbook.header_format) if one_sheet: sheet.write(1, 8, 'Collection', workbook.header_format) sheet.freeze_panes(2, 0) sheet.autofilter(1, 1, 2 + len(matches.results), 8) widths = {} for row, result in enumerate(matches.results, offset): sheet.write_number(row, 0, int(result.score)) name = result.entity.get('name') widths[1] = max(widths.get(1, 0), len(name)) if links: url = entity_url(result.entity_id) sheet.write_url(row, 1, url, workbook.link_format, name) else: sheet.write_string(row, 1, name) schema = model.get(result.entity['schema']) sheet.write_string(row, 2, schema.label) countries = ', '.join(sorted(result.entity.get('countries', []))) sheet.write_string(row, 3, countries.upper()) ent_props = result.entity.get('properties', {}) if (ent_props.get('sourceUrl') is not None): source_url = ', '.join(ent_props.get('sourceUrl')) else: source_url = '' sheet.write_string(row, 4, source_url) name = result.match.get('name') widths[5] = max(widths.get(5, 0), len(name)) if links: url = entity_url(result.match_id) sheet.write_url(row, 5, url, workbook.link_format, name) else: sheet.write_string(row, 5, name) schema = model.get(result.match['schema']) sheet.write_string(row, 6, schema.label) countries = ', '.join(sorted(result.match.get('countries', []))) sheet.write_string(row, 7, countries.upper()) if one_sheet: sheet.write_string(row, 8, match_collection.label) for idx, max_len in widths.items(): max_len = min(70, max(7, max_len + 1)) sheet.set_column(idx, idx, float(max_len)) return sheet