def create_entity(self, ctx, type_, **kwargs): entity = Entity(assume_contexts=[ctx.id]) entity.set(types.Object.attributes.type, type_, ctx) for attr in type_.attributes: if attr.name in kwargs: entity.set(attr, kwargs.get(attr.name), ctx) return entity
def view(dataset, entity): dataset = Dataset.find(dataset) entity = Entity.find(dataset, entity) print entity.data format = response_format() if format == 'json': return jsonify(entity) query = request.args.get('query', '').strip().lower() choices = match_op(entity.name, dataset) choices = filter(lambda (c,e,s): e != entity.id, choices) if len(query): choices = filter(lambda (c,e,s): query in Entity.find(dataset,e).name.lower(), choices) # THIS is very inefficient - rather do this # differently pager = Pager(choices, '.view', dataset=dataset.name, entity=entity.id, limit=10) # HACK: Fetch only the entities on the selected page. entities = Entity.id_map(dataset, map(lambda (c,v,s): v, pager.query[pager.offset:pager.offset+pager.limit])) for i, (c,e,s) in enumerate(pager.query): if e in entities: pager.query[i] = (c, entities.get(e), s) return render_template('entity/view.html', dataset=dataset, entity=entity, entities=pager, query=query)
def to_dict(self): from nomenklatura.model.entity import Entity num_aliases = Entity.all(self).filter( Entity.canonical_id != None).count() num_review = Entity.all(self).filter_by(reviewed=False).count() num_entities = Entity.all(self).count() num_invalid = Entity.all(self).filter_by(invalid=True).count() return { 'id': self.id, 'name': self.name, 'label': self.label, 'owner': self.owner.to_dict(), 'stats': { 'num_aliases': num_aliases, 'num_entities': num_entities, 'num_review': num_review, 'num_invalid': num_invalid }, 'ignore_case': self.ignore_case, 'match_aliases': self.match_aliases, 'public_edit': self.public_edit, 'normalize_text': self.normalize_text, 'enable_invalid': self.enable_invalid, 'created_at': self.created_at, 'updated_at': self.updated_at }
def import_upload(dataset_name, id, account_id, entity_col, alias_col): dataset = Dataset.find(dataset_name) account = Account.by_id(account_id) metadata, row_set = parse_upload(dataset, id) headers = detect_headers(row_set) for row in row_set: data = dict([(c.column, c.value) for c in row]) entity = data.pop(entity_col) if entity_col else None alias = data.pop(alias_col) if alias_col else None if alias_col and alias is not None and len(alias) and alias != entity: d = {'name': alias, 'data': data} alias_obj = Alias.lookup(dataset, d, account, match_entity=False) data = {} if entity_col and entity is not None and len(entity): d = {'name': entity, 'data': data} entity_obj = Entity.by_name(dataset, entity) if entity_obj is None: entity_obj = Entity.create(dataset, d, account) entity_obj.data = data if alias_col and entity_col: alias_obj.match(dataset, {'choice': entity_obj.id}, account) db.session.commit() flush_cache(dataset)
def _to_python(self, value, state): if isinstance(value, dict): value = value.get('id') entity = Entity.by_id(value) if entity is None: entity = Entity.by_name(state.dataset, value) if entity is None: raise Invalid('Entity does not exist: %s' % value, value, None) if entity == state.entity: return None if entity.dataset != state.dataset: raise Invalid('Entity belongs to a different dataset.', value, None) return entity
def suggest(dataset): """ Suggest API, emulates Google Refine API. See: http://code.google.com/p/google-refine/wiki/SuggestApi """ try: start = int(request.args.get('start', 0)) limit = int(request.args.get('limit', 20)) except: raise BadRequest('Invalid result range!') dataset = type_to_dataset(dataset) query = request.args.get('prefix', '').strip() results = prefix_search(query, dataset)[start:start+limit] entities = Entity.id_map(dataset, map(lambda (c,v): v, results)) matches = [] for candidate, entity_id in results: entity = entities[entity_id] matches.append({ 'name': entity.name, 'n:type': { 'id': '/' + dataset.name, 'name': dataset.label }, 'id': url_for('entity.view', dataset=dataset.name, entity=entity_id) }) return jsonify({ "code" : "/api/status/ok", "status" : "200 OK", "prefix" : query, "result" : matches })
def suggest(dataset): """ Suggest API, emulates Google Refine API. See: http://code.google.com/p/google-refine/wiki/SuggestApi """ dataset = Dataset.by_name(dataset) entities = Entity.all().filter(Entity.invalid!=True) query = request.args.get('prefix', '').strip() entities = entities.filter(Entity.name.ilike('%s%%' % query)) entities = entities.offset(get_offset(field='start')) entities = entities.limit(get_limit(default=20)) matches = [] for entity in entities: matches.append({ 'name': entity.name, 'n:type': { 'id': '/' + dataset.name, 'name': dataset.label }, 'id': entity.id }) return jsonify({ "code" : "/api/status/ok", "status" : "200 OK", "prefix" : query, "result" : matches })
def create(): data = request_data() dataset = Dataset.from_form(data) authz.require(authz.dataset_edit(dataset)) entity = Entity.create(dataset, data, request.account) db.session.commit() return redirect(url_for('.view', id=entity.id))
def review(dataset): entities = Entity.all() dataset = Dataset.find(dataset) entities = entities.filter_by(dataset=dataset) entities = entities.filter(Entity.reviewed==False) entities = entities.offset(randint(0, entities.count()-1)) return jsonify(entities.first())
def match(dataset, alias, random=False): dataset = Dataset.find(dataset) authz.require(authz.dataset_edit(dataset)) alias = Alias.find(dataset, alias) random = random or request.args.get('random')=='True' choices = match_op(alias.name, dataset, query=request.args.get('query')) pager = Pager(choices, '.match', dataset=dataset.name, alias=alias.id, limit=10) # HACK: Fetch only the entities on the selected page. entities = Entity.id_map(dataset, map(lambda (c,e,s): e, pager.query[pager.offset:pager.offset+pager.limit])) for i, (c,e,s) in enumerate(pager.query): if e in entities: pager.query[i] = (c, entities.get(e), s) html = render_template('alias/match.html', dataset=dataset, alias=alias, choices=pager, random=random) choice = 'INVALID' if alias.is_invalid else alias.entity_id if len(choices) and choice is None: c, e, s = choices[0] choice = 'INVALID' if s <= 50 else e.id return htmlfill.render(html, force_defaults=False, defaults={'choice': choice, 'name': alias.name, 'query': request.args.get('query', ''), 'random': random})
def __iter__(self): rp = db.engine.execute(self.lq) rows = rp.fetchall() ids = [r[0] for r in rows] entities = Entity.id_map(ids) for (id, score) in rows: yield {'score': int(score), 'entity': entities.get(id)}
def _to_python(self, name, state): entity = Entity.by_name(state.dataset, name) if entity is None: return name if state.entity and entity.id == state.entity.id: return name raise Invalid('Entity already exists.', name, None)
def index(dataset, format='json'): dataset = Dataset.find(dataset) q = Entity.all(dataset, eager=True) if format == 'csv': fn = csv_filename(dataset, 'entities') headers = {'Content-Disposition': 'attachment; filename=' + fn} return csvify(q, headers=headers) return jsonify(q)
def _get_candidates(dataset): for entity in Entity.all(dataset, eager_aliases=dataset.match_aliases): candidate = normalize(entity.name, dataset) yield candidate, entity.id if dataset.match_aliases: for link in entity.aliases_static: candidate = normalize(link.name, dataset) yield candidate, entity.id
def review(dataset): entities = Entity.all() dataset = Dataset.find(dataset) entities = entities.filter_by(dataset=dataset) entities = entities.filter(Entity.reviewed == False) # noqa review_count = entities.count() if review_count == 0: return jsonify(None) entities = entities.offset(randint(0, review_count - 1)) return jsonify(entities.first())
def index(dataset, format='json'): dataset = Dataset.find(dataset) q = Entity.all(dataset, eager=True) if format == 'csv': fn = csv_filename(dataset, 'entities') headers = { 'Content-Disposition': 'attachment; filename=' + fn } return csvify(q, headers=headers) return jsonify(q)
def review(dataset): entities = Entity.all() dataset = Dataset.find(dataset) entities = entities.filter_by(dataset=dataset) entities = entities.filter(Entity.reviewed == False) review_count = entities.count() if review_count == 0: return jsonify(None) entities = entities.offset(randint(0, review_count - 1)) return jsonify(entities.first())
def import_upload(upload_id, account_id, mapping): upload = Upload.all().filter_by(id=upload_id).first() account = Account.by_id(account_id) mapped = mapping['columns'].values() rows = [apply_mapping(r, mapping) for r in upload.tab.dict] # put aliases second. rows = sorted(rows, key=lambda r: 2 if r.get('canonical') else 1) for i, row in enumerate(rows): try: entity = None if row.get('id'): entity = Entity.by_id(row.get('id')) if entity is None: entity = Entity.by_name(upload.dataset, row.get('name')) if entity is None: entity = Entity.create(upload.dataset, row, account) # restore some defaults: if entity.canonical_id and 'canonical' not in mapped: row['canonical'] = entity.canonical_id if entity.invalid and 'invalid' not in mapped: row['invalid'] = entity.invalid if entity.attributes: attributes = entity.attributes.copy() else: attributes = {} attributes.update(row['attributes']) row['attributes'] = attributes entity.update(row, account) print(entity) if i % 100 == 0: db.session.commit() logging.debug('Commit') except Invalid as inv: logging.warning('Exception during import: {}'.format(str(inv))) db.session.commit() logging.info('Import Completed')
def import_upload(upload_id, account_id, mapping): upload = Upload.all().filter_by(id=upload_id).first() account = Account.by_id(account_id) mapped = mapping['columns'].values() rows = [apply_mapping(r, mapping) for r in upload.tab.dict] # put aliases second. rows = sorted(rows, key=lambda r: 2 if r.get('canonical') else 1) for i, row in enumerate(rows): try: entity = None if row.get('id'): entity = Entity.by_id(row.get('id')) if entity is None: entity = Entity.by_name(upload.dataset, row.get('name')) if entity is None: entity = Entity.create(upload.dataset, row, account) # restore some defaults: if entity.canonical_id and 'canonical' not in mapped: row['canonical'] = entity.canonical_id if entity.invalid and 'invalid' not in mapped: row['invalid'] = entity.invalid if entity.attributes: attributes = entity.attributes.copy() else: attributes = {} attributes.update(row['attributes']) row['attributes'] = attributes entity.update(row, account) print entity if i % 100 == 0: print 'COMMIT' db.session.commit() except Invalid, inv: # TODO: logging. print inv
def create(dataset): dataset = Dataset.find(dataset) authz.require(authz.dataset_edit(dataset)) data = request_content() try: entity = Entity.create(dataset, data, request.account) db.session.commit() return redirect(url_for('.view', dataset=dataset.name, entity=entity.id)) except Invalid, inv: return handle_invalid(inv, view_dataset, data=data, args=[dataset.name])
def update(dataset, entity): dataset = Dataset.find(dataset) authz.require(authz.dataset_edit(dataset)) entity = Entity.find(dataset, entity) data = request_content() try: entity.update(data, request.account) db.session.commit() flash("Updated %s" % entity.display_name, 'success') return redirect(url_for('.view', dataset=dataset.name, entity=entity.id)) except Invalid, inv: return handle_invalid(inv, view, data=data, args=[dataset.name, entity.id])
def import_upload(dataset_name, sig, account_id, entity_col, alias_col): dataset = Dataset.find(dataset_name) account = Account.by_id(account_id) metadata, row_set = parse_upload(dataset, sig) headers = detect_headers(row_set) for row in row_set: data = dict([(c.column, c.value) for c in row]) entity = data.pop(entity_col) if entity_col else None alias = data.pop(alias_col) if alias_col else None if alias_col and alias is not None and len(alias) and alias != entity: d = {'name': alias, 'data': data} alias_obj = Alias.lookup(dataset, d, account, match_entity=False) data = {} if entity_col and entity is not None and len(entity): d = {'name': entity, 'data': data} entity_obj = Entity.by_name(dataset, entity) if entity_obj is None: entity_obj = Entity.create(dataset, d, account) entity_obj.data = data if alias_col and entity_col: alias_obj.match(dataset, {'choice': entity_obj.id}, account) db.session.commit() flush_cache()
def create(dataset): dataset = Dataset.find(dataset) authz.require(authz.dataset_edit(dataset)) data = request_content() try: entity = Entity.create(dataset, data, request.account) db.session.commit() return redirect( url_for('.view', dataset=dataset.name, entity=entity.id)) except Invalid, inv: return handle_invalid(inv, view_dataset, data=data, args=[dataset.name])
def load_entity(context, mapping, record): type_ = types.get(mapping.get('type')) if type_ is None: log.warning("No type defined for entity in mapping: %r", mapping) return query = {'assume': [context.id], 'type': unicode(type_)} has_key = False data = [(types.Object.attributes.type, type_)] for attr in type_.attributes: if attr.name not in mapping or attr == types.Object.attributes.type: continue attr_map = mapping[attr.name] if attr.data_type == 'entity': value = load_entity(context, attr_map, record) else: value = record.get(attr_map.get('field')) if attr_map.get('key'): has_key = True query[attr.name] = value data.append((attr, value)) query = EntityQuery(query) entity = query.first() if has_key else None if entity is None: entity = Entity() for (attr, value) in data: entity.set(attr, value, context) db.session.commit() # log.info("Loaded entity: %r", entity) return entity
def view(dataset, entity): dataset = Dataset.find(dataset) entity = Entity.find(dataset, entity) print entity.data format = response_format() if format == 'json': return jsonify(entity) query = request.args.get('query', '').strip().lower() choices = match_op(entity.name, dataset) choices = filter(lambda (c, e, s): e != entity.id, choices) if len(query): choices = filter( lambda (c, e, s): query in Entity.find(dataset, e).name.lower(), choices) # THIS is very inefficient - rather do this # differently pager = Pager(choices, '.view', dataset=dataset.name, entity=entity.id, limit=10) # HACK: Fetch only the entities on the selected page. entities = Entity.id_map( dataset, map(lambda (c, v, s): v, pager.query[pager.offset:pager.offset + pager.limit])) for i, (c, e, s) in enumerate(pager.query): if e in entities: pager.query[i] = (c, entities.get(e), s) return render_template('entity/view.html', dataset=dataset, entity=entity, entities=pager, query=query)
def view(dataset): dataset = Dataset.find(dataset) format = response_format() if format == 'json': return jsonify(dataset) unmatched = Alias.all_unmatched(dataset).count() entities = Entity.all(dataset, query=request.args.get('query')) pager = Pager(entities, '.view', dataset=dataset.name, limit=10) return render_template('dataset/view.html', entities=pager, num_entities=len(pager), num_aliases=Alias.all(dataset).count(), invalid=Alias.all_invalid(dataset).count(), query=request.args.get('query', ''), dataset=dataset, unmatched=unmatched)
def update(dataset, entity): dataset = Dataset.find(dataset) authz.require(authz.dataset_edit(dataset)) entity = Entity.find(dataset, entity) data = request_content() try: entity.update(data, request.account) db.session.commit() flash("Updated %s" % entity.display_name, 'success') return redirect( url_for('.view', dataset=dataset.name, entity=entity.id)) except Invalid, inv: return handle_invalid(inv, view, data=data, args=[dataset.name, entity.id])
def reconcile_op(dataset, query): try: limit = max(1, min(100, int(query.get('limit')))) except ValueError: limit = 5 except TypeError: limit = 5 filters = [(p.get('p'), p.get('v')) for p in query.get('properties', [])] if dataset is None: dataset = type_to_dataset(query.get('type', '')) results = match(query.get('query', ''), dataset)[:limit] entities = Entity.id_map(dataset, map(lambda (c,e,s): e, results)) matches = [] skip = False for (candidate, entity_id, score) in results: entity = entities[entity_id] for key, fv in filters: if entity.data.get(key) != fv: skip = True if skip: continue id = url_for('entity.view', dataset=dataset.name, entity=entity.id) uri = url_for('entity.view', dataset=dataset.name, entity=entity.id, _external=True) matches.append({ 'name': entity.name, 'score': score, 'type': [{ 'id': '/' + dataset.name, 'name': dataset.label }], 'id': id, 'uri': uri, 'match': score==100 }) return { 'result': matches, 'num': len(results) }
def view(dataset): dataset = Dataset.find(dataset) format = response_format() headers = { 'X-Dataset': dataset.name, 'Last-Modified': http_date(dataset.last_modified) } if format == 'json': return jsonify(dataset, headers=headers) unmatched = Alias.all_unmatched(dataset).count() entities = Entity.all(dataset, query=request.args.get('query')) pager = Pager(entities, '.view', dataset=dataset.name, limit=10) html = render_template('dataset/view.html', entities=pager, num_entities=len(pager), num_aliases=Alias.all(dataset).count(), invalid=Alias.all_invalid(dataset).count(), query=request.args.get('query', ''), dataset=dataset, unmatched=unmatched) return Response(html, headers=headers)
def index(): entities = Entity.all() dataset_arg = request.args.get('dataset') if dataset_arg is not None: dataset = Dataset.find(dataset_arg) entities = entities.filter_by(dataset=dataset) filter_name = request.args.get('filter_name', '') if len(filter_name): query = '%' + filter_name + '%' entities = entities.filter(Entity.name.ilike(query)) # TODO, other filters. format = request.args.get('format', 'json').lower().strip() if format == 'csv': res = csvify(entities) else: res = query_pager(entities) if arg_bool('download'): fn = dataset_filename(dataset, format) res.headers['Content-Disposition'] = 'attachment; filename=' + fn return res
def index(): entities = Entity.all() dataset_arg = request.args.get("dataset") if dataset_arg is not None: dataset = Dataset.find(dataset_arg) entities = entities.filter_by(dataset=dataset) filter_name = request.args.get("filter_name", "") if len(filter_name): query = "%" + filter_name + "%" entities = entities.filter(Entity.name.ilike(query)) # TODO, other filters. format = request.args.get("format", "json").lower().strip() if format == "csv": res = csvify(entities) else: pager = Pager(entities) res = jsonify(pager.to_dict()) if arg_bool("download"): fn = dataset_filename(dataset, format) res.headers["Content-Disposition"] = "attachment; filename=" + fn return res
def aliases(id): entity = Entity.by_id(id) return query_pager(entity.aliases, id=id)
def aliases(id): entity = Entity.by_id(id) pager = Pager(entity.aliases, id=id) return jsonify(pager.to_dict())
def view_by_name(dataset): dataset = Dataset.find(dataset) entity = Entity.by_name(dataset, request.args.get('name')) if entity is None: raise NotFound("No such entity: %s" % request.args.get('name')) return view(dataset.name, entity.id)
def view(id): entity = Entity.by_id(id) return jsonify(entity)
def create(): authz.require(authz.system_edit()) context = Context.create(current_user, {}) entity = Entity.create(request_data(), context) db.session.commit() return redirect(url_for(".view", id=entity.id))
def view(id): entity = object_or_404(Entity.by_id(id)) return jsonify(entity)
def by_name(dataset): dataset = Dataset.find(dataset) name = request.args.get('name') entity = object_or_404(Entity.by_name(dataset, name)) return jsonify(entity)
def update(id): entity = Entity.by_id(id) authz.require(authz.dataset_edit(entity.dataset)) entity.update(request_data(), request.account) db.session.commit() return redirect(url_for('.view', id=entity.id))