def setUp(self): self.app = make_test_app() Entity.all().delete() # Consistently include an extra private project with Entity # that should not show in any test results project, permission = _project_and_permission(private=True) entity = Entity(project=project, status=authz.PUBLISHED_THRESHOLD) db.session.add(entity)
def save(self): """ Save the entity to the database. Do this only once, after all properties have been set. """ # fetch existing: q = Entity.all() q = q.filter(Entity.project==self.loader.project) for name, only_active in self.update_criteria: value = self.properties.get(name).get('value') attr = self.loader.project.get_attribute('entity', name) q = Entity._filter_property(q, [attr], value, only_active=only_active) entity = q.first() try: data = { 'project': self.loader.project, 'author': self.loader.account, 'schemata': self.schemata, 'properties': self.properties } self._entity = entities.save(data, entity=entity) except Invalid, inv: log.warning("Validation error: %r", inv)
def rebuild(): """ Execute the change processing handlers for all entities and relations currently loaded. This can be used as a housekeeping function. """ for project in Project.all(): _project_changed(project.slug, 'delete') _project_changed(project.slug, 'create') for schema in project.schemata: _schema_changed(schema.project.slug, schema.name, 'delete') _schema_changed(schema.project.slug, schema.name, 'create') eq = Entity.all().filter_by(same_as=None) eq = eq.filter_by(project=project) for i, entity in enumerate(eq): if i > 0 and i % 1000 == 0: log.info("Rebuilt: %s entities", i) _entity_changed(entity.id, 'delete') _entity_changed(entity.id, 'create') rq = Relation.all().filter_by(project=project) for i, relation in enumerate(rq): if i > 0 and i % 1000 == 0: log.info("Rebuilt: %s relation", i) _relation_changed(relation.id, 'delete') _relation_changed(relation.id, 'create')
def save(self): """ Save the entity to the database. Do this only once, after all properties have been set. """ # fetch existing: q = Entity.all() q = q.filter(Entity.project == self.loader.project) for name, only_active in self.update_criteria: value = self.properties.get(name).get('value') attr = self.loader.project.get_attribute('entity', name) q = Entity._filter_property(q, [attr], value, only_active=only_active) entity = q.first() try: data = { 'project': self.loader.project, 'author': self.loader.account, 'schemata': self.schemata, 'properties': self.properties } self._entity = entities.save(data, entity=entity) except Invalid, inv: if not self.loader.ignore_errors: raise log.warning("Validation error: %r", inv.asdict())
def index(): query = filter_query(Entity, Entity.all(), request.args) for schema in request.args.getlist('schema'): alias = aliased(Schema) query = query.join(alias, Entity.schemata) query = query.filter(alias.name.in_(schema.split(','))) pager = Pager(query) conv = lambda es: [entities.to_rest_index(e) for e in es] return jsonify(pager.to_dict(conv))
def export_aliases(project, path): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ with open(path, 'w') as fh: writer = DictWriter(fh, ['entity_id', 'alias', 'canonical', 'schemata']) writer.writeheader() q = Entity.all().filter_by(same_as=None) q = q.filter(Entity.project==project) for i, entity in enumerate(q): export_entity(entity, writer) if i % 100 == 0: log.info("Dumped %s entity names...", i)
def export_aliases(project, path): """ Dump a list of all entity names to a CSV file. The table will contain the active name of each entity, and one of the other existing names as an alias. """ with open(path, 'w') as fh: writer = DictWriter(fh, ['entity_id', 'alias', 'canonical', 'schemata']) writer.writeheader() q = Entity.all().filter_by(same_as=None) q = q.filter(Entity.project == project) for i, entity in enumerate(q): export_entity(entity, writer) if i % 100 == 0: log.info("Dumped %s entity names...", i)
def index_project(self, project=None): """ Index an entire project, or the entire database if no project is given. """ q = Entity.all().filter_by(same_as=None) if project is not None: q = q.filter(Entity.project == project) for i, entity in enumerate(q): self.index_entity(entity) if i > 0 and i % 1000 == 0: log.info("Indexed: %s entities", i) es.indices.refresh(index=es_index) es.indices.refresh(index=es_index)
def rebuild(): """ Execute the change processing handlers for all entities and relations currently loaded. This can be used as a housekeeping function. """ for i, entity in enumerate(Entity.all().filter_by(same_as=None)): if i > 0 and i % 1000 == 0: log.info("Rebuilt: %s entities", i) _entity_changed(entity.id) for i, relation in enumerate(Relation.all()): if i > 0 and i % 1000 == 0: log.info("Rebuilt: %s relation", i) _relation_changed(relation.id)
def index_entities(): """ Re-build an index for all enitites from scratch. """ for i, entity in enumerate(Entity.all().filter_by(same_as=None)): body = entities.to_index(entity) if not 'name' in body: log.warn('No name: %s, skipping!', entity.id) continue es.index(index=es_index, doc_type='entity', id=body.pop('id'), body=body) if i > 0 and i % 1000 == 0: log.info("Indexed: %s entities", i) es.indices.refresh(index=es_index) es.indices.refresh(index=es_index)
def index_entities(): """ Re-build an index for all enitites from scratch. """ for i, entity in enumerate(Entity.all().filter_by(same_as=None)): body = entities.to_index(entity) if 'name' not in body.get('properties', {}): log.warn('No name: %s, skipping!', entity.id) #pprint(body) continue es.index(index=es_index, doc_type='entity', id=body.pop('id'), body=body) if i > 0 and i % 1000 == 0: log.info("Indexed: %s entities", i) es.indices.refresh(index=es_index) es.indices.refresh(index=es_index)
def generate_sitemap(count=40000): """ Generate a static sitemap.xml for the most central entities in the database. """ PATTERN = app.config.get('ENTITY_VIEW_PATTERN') entities = [] for i, entity in enumerate(Entity.all().yield_per(5000)): dt = entity.updated_at.strftime('%Y-%m-%d') entities.append((PATTERN % entity.id, dt, entity.degree)) if i > 0 and i % 1000 == 0: log.info("Loaded %s entities...", i) upper = max([e[2] for e in entities]) entities = sorted(entities, key=lambda e: e[2], reverse=True)[:count] entities = [(i, d, '%.2f' % max(0.3, ((float(s)**0.3)/upper))) for (i,d,s) in entities] xml = render_template('sitemap.xml', entities=entities) with open(os.path.join(app.static_folder, 'sitemap.xml'), 'w') as fh: fh.write(xml)
def index(): query = filter_query(Entity, Entity.all(), request.args) if 'q' in request.args and len(request.args.get('q').strip()): q = '%%%s%%' % request.args.get('q').strip() query = query.join(EntityProperty) query = query.filter(EntityProperty.name=='name') query = query.filter(EntityProperty.value_string.ilike(q)) for schema in request.args.getlist('schema'): if not len(schema.strip()): continue alias = aliased(Schema) query = query.join(alias, Entity.schemata) query = query.filter(alias.name.in_(schema.split(','))) query = query.filter(Entity.same_as==None) query = query.distinct() pager = Pager(query) validate_cache(keys=pager.cache_keys()) return jsonify(pager, index=True)
def generate_sitemap(count=40000): """ Generate a static sitemap.xml for the most central entities in the database. """ PATTERN = app.config.get('ENTITY_VIEW_PATTERN') entities = [] for i, entity in enumerate(Entity.all().yield_per(5000)): dt = entity.updated_at.strftime('%Y-%m-%d') entities.append((PATTERN % entity.id, dt, entity.degree)) if i > 0 and i % 1000 == 0: log.info("Loaded %s entities...", i) upper = max([e[2] for e in entities]) entities = sorted(entities, key=lambda e: e[2], reverse=True)[:count] entities = [(i, d, '%.2f' % max(0.3, ((float(s)**0.3) / upper))) for (i, d, s) in entities] xml = render_template('sitemap.xml', entities=entities) with open(os.path.join(app.static_folder, 'sitemap.xml'), 'w') as fh: fh.write(xml)
def save(self): """ Save the entity to the database. Do this only once, after all properties have been set. """ # fetch existing: q = Entity.all() q = q.filter(Entity.project == self.loader.project) for name, only_active in self.update_criteria: v = self.properties.get(name).get('value') q = Entity._filter_property(q, name, v, only_active=only_active) entity = q.first() try: data = { 'project': self.loader.project, 'author': self.loader.account, 'schema': self.schema, 'properties': self.properties } self._entity = entities.save(data, entity=entity) except Invalid, inv: if not self.loader.ignore_errors: raise log.warning("Validation error: %r", inv.asdict())