Пример #1
0
def update(id):
    authz.require(authz.watchlist_write(id))
    watchlist = obj_or_404(Watchlist.by_id(id))
    watchlist.update(request_data())
    db.session.add(watchlist)
    db.session.commit()
    return view(id)
Пример #2
0
def delete(id):
    authz.require(authz.watchlist_write(id))
    watchlist = obj_or_404(Watchlist.by_id(id))
    analyze_terms.delay(watchlist.terms)
    watchlist.delete()
    db.session.commit()
    return jsonify({'status': 'ok'})
Пример #3
0
 def crawl(self):
     url = urljoin(self.host, '/ticket/all_closed/?format=json')
     watchlist = Watchlist.by_foreign_id(url, {
         'label': 'Investigative Dashboard Requests'
     })
     Permission.grant_foreign(watchlist, 'idashboard:occrp_staff',
                              True, False)
     existing_entities = []
     previous_terms = watchlist.terms
     updated_terms = set()
     db.session.flush()
     for endpoint in ['all_closed', 'all_open']:
         url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
         data = self.session.get(url).json()
         for req in data.get('paginator', {}).get('object_list'):
             category = REQUEST_TYPES.get(req.get('ticket_type'))
             if category is None:
                 continue
             ent = Entity.by_foreign_id(str(req.get('id')), watchlist, {
                 'name': req.get('name'),
                 'category': category,
                 'data': req,
                 'selectors': [req.get('name')]
             })
             updated_terms.update(ent.terms)
             existing_entities.append(ent.id)
             log.info("  # %s (%s)", ent.name, ent.category)
     watchlist.delete_entities(spare=existing_entities)
     terms = previous_terms.symmetric_difference(updated_terms)
     self.emit_watchlist(watchlist, terms)
Пример #4
0
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        watchlist = Watchlist.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(watchlist, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", watchlist.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        previous_terms = watchlist.terms
        updated_terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            aliases = [on.get('alias') for on in entity.get('other_names', [])]
            ent = Entity.by_foreign_id(entity.get('id'), watchlist, {
                'name': entity.get('name'),
                'category': SCHEMATA.get(entity.get('$schema'), OTHER),
                'data': entity,
                'selectors': aliases
            })
            updated_terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)
        watchlist.delete_entities(spare=existing_entities)
        terms = previous_terms.symmetric_difference(updated_terms)
        self.emit_watchlist(watchlist, terms)
Пример #5
0
    def matchers(self):
        timestamps = Watchlist.timestamps()
        for ts in self.watchlists.keys():
            if ts not in timestamps:
                self.watchlists.pop(ts, None)

        for ts in timestamps:
            if ts not in self.watchlists:
                log.info('Entity tagger updating watchlist: %r', ts)
                self.watchlists[ts] = self.compile_watchlist(ts[0])

        return self.watchlists.values()
Пример #6
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)

        watchlist = Watchlist.by_foreign_id(url, {
            'label': source.get('source_id')
        })
        Permission.grant_foreign(watchlist, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", watchlist.label)
        previous_terms = watchlist.terms
        updated_terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            if entity.get('name') is None:
                continue
            selectors = []
            for on in entity.get('other_names', []):
                selectors.append(on.get('other_name'))
            for iden in entity.get('identities', []):
                if iden.get('number'):
                    selectors.append(iden.get('number'))
            ent = Entity.by_foreign_id(entity.get('uid'), watchlist, {
                'name': entity.get('name'),
                'category': CATEGORIES.get(entity.get('type'), OTHER),
                'data': entity,
                'selectors': selectors
            })
            updated_terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)
        watchlist.delete_entities(spare=existing_entities)
        terms = previous_terms.symmetric_difference(updated_terms)
        self.emit_watchlist(watchlist, terms)
Пример #7
0
def view(id):
    authz.require(authz.watchlist_read(id))
    watchlist = obj_or_404(Watchlist.by_id(id))
    etag_cache_keygen(watchlist)
    return jsonify(watchlist)
Пример #8
0
def create():
    authz.require(authz.logged_in())
    watchlist = Watchlist.create(request_data(), request.auth_role)
    db.session.commit()
    return view(watchlist.id)
Пример #9
0
def index():
    q = Watchlist.all(watchlist_ids=authz.watchlists(authz.READ))
    q = q.order_by(Watchlist.label.asc())
    return jsonify(Pager(q).to_dict())