def update(id): authz.require(authz.watchlist_write(id)) watchlist = obj_or_404(Watchlist.by_id(id)) watchlist.update(request_data()) db.session.add(watchlist) db.session.commit() return view(id)
def delete(id): authz.require(authz.watchlist_write(id)) watchlist = obj_or_404(Watchlist.by_id(id)) analyze_terms.delay(watchlist.terms) watchlist.delete() db.session.commit() return jsonify({'status': 'ok'})
def crawl(self): url = urljoin(self.host, '/ticket/all_closed/?format=json') watchlist = Watchlist.by_foreign_id(url, { 'label': 'Investigative Dashboard Requests' }) Permission.grant_foreign(watchlist, 'idashboard:occrp_staff', True, False) existing_entities = [] previous_terms = watchlist.terms updated_terms = set() db.session.flush() for endpoint in ['all_closed', 'all_open']: url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint) data = self.session.get(url).json() for req in data.get('paginator', {}).get('object_list'): category = REQUEST_TYPES.get(req.get('ticket_type')) if category is None: continue ent = Entity.by_foreign_id(str(req.get('id')), watchlist, { 'name': req.get('name'), 'category': category, 'data': req, 'selectors': [req.get('name')] }) updated_terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) watchlist.delete_entities(spare=existing_entities) terms = previous_terms.symmetric_difference(updated_terms) self.emit_watchlist(watchlist, terms)
def crawl_collection(self, collection): if not len(collection.get('subjects', [])): return url = urljoin(self.URL, '/api/collections/%s' % collection.get('id')) watchlist = Watchlist.by_foreign_id(url, { 'label': collection.get('title') }) res = requests.get('%s/permissions' % url, headers=self.HEADERS) for perm in res.json().get('results', []): Permission.grant_foreign(watchlist, perm.get('role'), perm.get('read'), perm.get('write')) log.info(" > Spindle collection: %s", watchlist.label) res = requests.get('%s/entities' % url, headers=self.HEADERS) previous_terms = watchlist.terms updated_terms = set() existing_entities = [] for entity in res.json().get('results', []): if entity.get('name') is None: continue aliases = [on.get('alias') for on in entity.get('other_names', [])] ent = Entity.by_foreign_id(entity.get('id'), watchlist, { 'name': entity.get('name'), 'category': SCHEMATA.get(entity.get('$schema'), OTHER), 'data': entity, 'selectors': aliases }) updated_terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) watchlist.delete_entities(spare=existing_entities) terms = previous_terms.symmetric_difference(updated_terms) self.emit_watchlist(watchlist, terms)
def matchers(self): timestamps = Watchlist.timestamps() for ts in self.watchlists.keys(): if ts not in timestamps: self.watchlists.pop(ts, None) for ts in timestamps: if ts not in self.watchlists: log.info('Entity tagger updating watchlist: %r', ts) self.watchlists[ts] = self.compile_watchlist(ts[0]) return self.watchlists.values()
def crawl_source(self, source): if source.get('source_id') in IGNORE_SOURCES: return json_file = source.get('data', {}).get('json') url = urljoin(JSON_PATH, json_file) watchlist = Watchlist.by_foreign_id(url, { 'label': source.get('source_id') }) Permission.grant_foreign(watchlist, Role.SYSTEM_GUEST, True, False) log.info(" > OpenNames collection: %s", watchlist.label) previous_terms = watchlist.terms updated_terms = set() existing_entities = [] db.session.flush() entities = requests.get(url).json().get('entities', []) for entity in entities: if entity.get('name') is None: continue selectors = [] for on in entity.get('other_names', []): selectors.append(on.get('other_name')) for iden in entity.get('identities', []): if iden.get('number'): selectors.append(iden.get('number')) ent = Entity.by_foreign_id(entity.get('uid'), watchlist, { 'name': entity.get('name'), 'category': CATEGORIES.get(entity.get('type'), OTHER), 'data': entity, 'selectors': selectors }) updated_terms.update(ent.terms) existing_entities.append(ent.id) log.info(" # %s (%s)", ent.name, ent.category) watchlist.delete_entities(spare=existing_entities) terms = previous_terms.symmetric_difference(updated_terms) self.emit_watchlist(watchlist, terms)
def view(id): authz.require(authz.watchlist_read(id)) watchlist = obj_or_404(Watchlist.by_id(id)) etag_cache_keygen(watchlist) return jsonify(watchlist)
def create(): authz.require(authz.logged_in()) watchlist = Watchlist.create(request_data(), request.auth_role) db.session.commit() return view(watchlist.id)
def index(): q = Watchlist.all(watchlist_ids=authz.watchlists(authz.READ)) q = q.order_by(Watchlist.label.asc()) return jsonify(Pager(q).to_dict())