Пример #1
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        collection = Collection.by_foreign_id(url, {
            'label': 'Investigative Dashboard Requests'
        })
        Permission.grant_foreign(collection, 'idashboard:occrp_staff',
                                 True, False)
        existing_entities = []
        terms = set()
        db.session.flush()
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()
            print url
            continue

            for req in data.get('paginator', {}).get('object_list'):
                ent = self.update_entity(req, collection)
                if ent is not None:
                    terms.update(ent.terms)
                    existing_entities.append(ent.id)
                    log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Пример #2
0
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        collection = Collection.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(collection, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", collection.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            aliases = [on.get('alias') for on in entity.get('other_names', [])]
            ent = Entity.by_foreign_id(entity.get('id'), collection, {
                'name': entity.get('name'),
                'category': SCHEMATA.get(entity.get('$schema'), OTHER),
                'data': entity,
                'selectors': aliases
            })
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Пример #3
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        collection = Collection.by_foreign_id(
            url, {'label': 'Investigative Dashboard Requests'})
        Permission.grant_foreign(collection, 'idashboard:occrp_staff', True,
                                 False)
        existing_entities = []
        terms = set()
        db.session.flush()
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()
            print url
            continue

            for req in data.get('paginator', {}).get('object_list'):
                ent = self.update_entity(req, collection)
                if ent is not None:
                    terms.update(ent.terms)
                    existing_entities.append(ent.id)
                    log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Пример #4
0
def delete_collection(collection, keep_metadata=False, sync=False):
    cancel_queue(collection)
    aggregator = get_aggregator(collection)
    try:
        aggregator.drop()
    finally:
        aggregator.close()
    flush_notifications(collection, sync=sync)
    index.delete_entities(collection.id, sync=sync)
    xref_index.delete_xref(collection, sync=sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        # Considering linkages metadata for now, might be wrong:
        Linkage.delete_by_collection(collection.id)
        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=True)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Пример #5
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = self.find_collection(url, {'label': label})
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme':
                    'opennames:%s' % source.get('source_id'),
                    'identifier':
                    entity.get('uid')
                }],
                'other_names': [],
                'name':
                entity.get('name'),
                '$schema':
                SCHEMA.get(entity.get('type'), '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)
            self.emit_entity(collection, data)
        self.emit_collection(collection)
Пример #6
0
 def load_fixtures(self):
     self.private_coll = Collection.create({
         'foreign_id': 'test_private',
         'label': "Private Collection",
         'category': 'grey'
     })
     self._banana = Entity.create(
         {
             'schema': 'Person',
             'properties': {
                 'name': ['Banana'],
             }
         }, self.private_coll)
     user = Role.by_foreign_id(Role.SYSTEM_USER)
     Permission.grant(self.private_coll, user, True, False)
     self.public_coll = Collection.create({
         'foreign_id': 'test_public',
         'label': "Public Collection",
         'category': 'news'
     })
     self._kwazulu = Entity.create(
         {
             'schema': 'Company',
             'properties': {
                 'name': ['KwaZulu'],
                 'alias': ['kwazulu']
             }
         }, self.public_coll)
     visitor = Role.by_foreign_id(Role.SYSTEM_GUEST)
     Permission.grant(self.public_coll, visitor, True, False)
     db.session.commit()
     samples = read_entities(self.get_fixture_path('samples.ijson'))
     index_entities(self.private_coll, samples)
     process_collection(self.public_coll, ingest=False, reset=True)
     process_collection(self.private_coll, ingest=False, reset=True)
Пример #7
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = self.find_collection(url, {
            'label': label
        })
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme': 'opennames:%s' % source.get('source_id'),
                    'identifier': entity.get('uid')
                }],
                'other_names': [],
                'name': entity.get('name'),
                '$schema': SCHEMA.get(entity.get('type'),
                                      '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)
            self.emit_entity(collection, data)
        self.emit_collection(collection)
Пример #8
0
def update_permission(role, collection, read, write, editor_id=None):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)
    db.session.commit()
    refresh_role(role)
    if post is None:
        return
    params = {"role": role, "collection": collection}
    if pre is None or not pre.read:
        if role.foreign_id == Role.SYSTEM_GUEST:
            publish(
                Events.PUBLISH_COLLECTION,
                actor_id=editor_id,
                params=params,
                channels=[GLOBAL],
            )
        else:
            publish(
                Events.GRANT_COLLECTION,
                actor_id=editor_id,
                params=params,
                channels=[role],
            )
    return post
Пример #9
0
def delete_collection(collection_id, wait=False):
    # Deleting a collection affects many associated objects and requires
    # checks, so this is done manually and in detail here.
    q = db.session.query(Collection)
    q = q.filter(Collection.id == collection_id)
    collection = q.first()
    if collection is None:
        log.error("No collection with ID: %r", collection_id)
        return

    log.info("Deleting collection [%r]: %r", collection.id, collection.label)
    deleted_at = datetime.utcnow()
    index_delete(collection_id, wait=wait)

    log.info("Deleting cross-referencing matches...")
    Match.delete_by_collection(collection_id)

    log.info("Deleting permissions...")
    Permission.delete_by_collection(collection_id, deleted_at=deleted_at)

    delete_documents(collection_id, wait=wait)
    delete_entities(collection_id, wait=wait)

    collection.delete(deleted_at=deleted_at)
    db.session.commit()
Пример #10
0
 def crawl(self):
     url = urljoin(self.host, '/ticket/all_closed/?format=json')
     watchlist = Watchlist.by_foreign_id(url, {
         'label': 'Investigative Dashboard Requests'
     })
     Permission.grant_foreign(watchlist, 'idashboard:occrp_staff',
                              True, False)
     existing_entities = []
     previous_terms = watchlist.terms
     updated_terms = set()
     db.session.flush()
     for endpoint in ['all_closed', 'all_open']:
         url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
         data = self.session.get(url).json()
         for req in data.get('paginator', {}).get('object_list'):
             category = REQUEST_TYPES.get(req.get('ticket_type'))
             if category is None:
                 continue
             ent = Entity.by_foreign_id(str(req.get('id')), watchlist, {
                 'name': req.get('name'),
                 'category': category,
                 'data': req,
                 'selectors': [req.get('name')]
             })
             updated_terms.update(ent.terms)
             existing_entities.append(ent.id)
             log.info("  # %s (%s)", ent.name, ent.category)
     watchlist.delete_entities(spare=existing_entities)
     terms = previous_terms.symmetric_difference(updated_terms)
     self.emit_watchlist(watchlist, terms)
Пример #11
0
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        watchlist = Watchlist.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(watchlist, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", watchlist.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        previous_terms = watchlist.terms
        updated_terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            aliases = [on.get('alias') for on in entity.get('other_names', [])]
            ent = Entity.by_foreign_id(entity.get('id'), watchlist, {
                'name': entity.get('name'),
                'category': SCHEMATA.get(entity.get('$schema'), OTHER),
                'data': entity,
                'selectors': aliases
            })
            updated_terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)
        watchlist.delete_entities(spare=existing_entities)
        terms = previous_terms.symmetric_difference(updated_terms)
        self.emit_watchlist(watchlist, terms)
Пример #12
0
    def test_update_collections_via_doc_update(self):
        url = '/api/1/documents/1000'
        ores = self.client.get(url)
        user = self.login()
        Permission.grant_collection(1000, user, True, True)

        can_write = Collection.create({'label': "Write"}, user)
        no_write = Collection.create({'label': "No-write"})
        db.session.commit()

        data = ores.json.copy()
        data['collection_id'].append(can_write.id)
        res = self.client.post(url,
                               data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 200, res
        assert can_write.id in res.json['collection_id'], res.json

        data = ores.json.copy()
        data['collection_id'] = [no_write.id]
        res = self.client.post(url,
                               data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 200, res
        assert no_write.id not in res.json['collection_id'], res.json
        assert 1000 in res.json['collection_id'], res.json

        data = ores.json.copy()
        data['collection_id'] = ['foo']
        res = self.client.post(url,
                               data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 400, res
Пример #13
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        collection = Collection.by_foreign_id(url, {
            'label': 'Investigative Dashboard Requests'
        })
        Permission.grant_foreign(collection, 'idashboard:occrp_staff',
                                 True, False)
        existing_entities = []
        terms = set()
        db.session.flush()
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()
            for req in data.get('paginator', {}).get('object_list'):
                category = REQUEST_TYPES.get(req.get('ticket_type'))
                if category is None:
                    continue
                ent = Entity.by_foreign_id(str(req.get('id')), collection, {
                    'name': req.get('name'),
                    'category': category,
                    'data': req,
                    'selectors': [req.get('name')]
                })
                terms.update(ent.terms)
                existing_entities.append(ent.id)
                log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Пример #14
0
    def test_update_collections_via_doc_update(self):
        url = '/api/1/documents/1000'
        ores = self.client.get(url)
        user = self.login()
        Permission.grant_collection(1000, user, True, True)

        can_write = Collection.create({'label': "Write"}, user)
        no_write = Collection.create({'label': "No-write"})
        db.session.commit()

        data = ores.json.copy()
        data['collection_id'].append(can_write.id)
        res = self.client.post(url, data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 200, res
        assert can_write.id in res.json['collection_id'], res.json

        data = ores.json.copy()
        data['collection_id'] = [no_write.id]
        res = self.client.post(url, data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 200, res
        assert no_write.id not in res.json['collection_id'], res.json
        assert 1000 in res.json['collection_id'], res.json

        data = ores.json.copy()
        data['collection_id'] = ['foo']
        res = self.client.post(url, data=json.dumps(data),
                               content_type='application/json')
        assert res.status_code == 400, res
Пример #15
0
def bulk_load(config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    for foreign_id, data in config.items():
        collection = Collection.by_foreign_id(foreign_id)
        if collection is None:
            collection = Collection.create({
                'foreign_id': foreign_id,
                'managed': True,
                'label': data.get('label') or foreign_id,
                'summary': data.get('summary'),
                'category': data.get('category'),
            })

        for role_fk in dict_list(data, 'roles', 'role'):
            role = Role.by_foreign_id(role_fk)
            if role is not None:
                Permission.grant(collection, role, True, False)
            else:
                log.warning("Could not find role: %s", role_fk)

        db.session.commit()
        update_collection(collection)

        for query in dict_list(data, 'queries', 'query'):
            load_query(collection, query)
Пример #16
0
    def load_fixtures(self):
        self.admin = self.create_user(foreign_id='admin', is_admin=True)
        self.private_coll = self.create_collection(
            foreign_id='test_private',
            label="Private Collection",
            category='grey',
            creator=self.admin
        )
        self._banana = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-08-21'
            }
        }, self.private_coll)
        self._banana2 = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-03-21'
            }
        }, self.private_coll)
        self._banana3 = self.create_entity({
            'schema': 'Person',
            'properties': {
                'name': ['Banana'],
                'birthDate': '1970-05-21'
            }
        }, self.private_coll)
        user = Role.by_foreign_id(Role.SYSTEM_USER)
        Permission.grant(self.private_coll, user, True, False)
        self.public_coll = self.create_collection(
            foreign_id='test_public',
            label="Public Collection",
            category='news',
            creator=self.admin
        )
        self._kwazulu = self.create_entity({
            'schema': 'Company',
            'properties': {
                'name': ['KwaZulu'],
                'alias': ['kwazulu']
            }
        }, self.public_coll)
        visitor = Role.by_foreign_id(Role.SYSTEM_GUEST)
        Permission.grant(self.public_coll, visitor, True, False)
        db.session.commit()

        aggregator = get_aggregator(self.public_coll)
        aggregator.delete()
        aggregator.close()
        reindex_collection(self.public_coll, sync=True)

        aggregator = get_aggregator(self.private_coll)
        aggregator.delete()
        for sample in read_entities(self.get_fixture_path('samples.ijson')):
            aggregator.put(sample, fragment='sample')
        aggregator.close()
        reindex_collection(self.private_coll, sync=True)
Пример #17
0
def cleanup_deleted():
    from aleph.model import Alert, Entity, Collection
    from aleph.model import Permission, Role
    Alert.cleanup_deleted()
    Permission.cleanup_deleted()
    Entity.cleanup_deleted()
    Collection.cleanup_deleted()
    Role.cleanup_deleted()
    db.session.commit()
Пример #18
0
    def crawl_collection(self, collection):
        if not len(collection.get('subjects', [])):
            return
        url = urljoin(self.URL, '/api/collections/%s' % collection.get('id'))
        collection = Collection.by_foreign_id(url, {
            'label': collection.get('title')
        })
        res = requests.get('%s/permissions' % url, headers=self.HEADERS)
        for perm in res.json().get('results', []):
            Permission.grant_foreign(collection, perm.get('role'),
                                     perm.get('read'), perm.get('write'))

        log.info(" > Spindle collection: %s", collection.label)
        res = requests.get('%s/entities' % url, headers=self.HEADERS)
        terms = set()
        existing_entities = []
        for entity in res.json().get('results', []):
            if entity.get('name') is None:
                continue
            entity['$schema'] = SCHEMATA.get(entity.get('$schema'), OTHER)
            if 'jurisdiction_code' in entity:
                entity['jurisdiction_code'] = \
                    entity['jurisdiction_code'].lower()
            entity.pop('members', None)
            entity.pop('memberships', None)
            entity.pop('assets', None)
            entity.pop('owners', None)
            entity.pop('family_first', None)
            entity.pop('family_second', None)
            entity.pop('social_first', None)
            entity.pop('social_second', None)

            for date_field in ['birth_date']:
                if date_field in entity and 'T' in entity[date_field]:
                    entity[date_field], _ = entity[date_field].split('T', 1)

            for on in entity.get('other_names', []):
                name = on.pop('alias', None)
                if name is not None:
                    on['name'] = name

            entity['identifiers'] = [{
                'scheme': 'spindle',
                'identifier': entity.pop('id', None)
            }]
            ent = Entity.save(entity, collection_id=collection.id, merge=True)
            db.session.flush()
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Пример #19
0
def delete_collection(collection, sync=False):
    reset_collection(collection, sync=False)
    flush_notifications(collection)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
    collection.delete(deleted_at=deleted_at)
    db.session.commit()
    index.delete_collection(collection.id, sync=sync)
    Authz.flush()
Пример #20
0
def update_permission(role, collection, read, write):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)
    db.session.commit()

    notify_role_template(role, collection.label, 'email/permission.html',
                         url=collection_url(collection.id),
                         pre=pre,
                         post=post,
                         collection=collection)
    return post
Пример #21
0
def delete_collection(collection, sync=False):
    flush_notifications(collection)
    drop_aggregator(collection)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Match.delete_by_collection(collection.id, deleted_at=deleted_at)
    Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
    collection.delete(deleted_at=deleted_at)
    db.session.commit()
    index.delete_collection(collection.id, sync=sync)
    index.delete_entities(collection.id, sync=False)
    refresh_collection(collection.id)
    Authz.flush()
Пример #22
0
def cleanup_deleted():
    from aleph.model import Alert, Entity, Collection
    from aleph.model import Permission, Role, Document
    from aleph.model import Diagram, Mapping
    Mapping.cleanup_deleted()
    Diagram.cleanup_deleted()
    Document.cleanup_deleted()
    Alert.cleanup_deleted()
    Permission.cleanup_deleted()
    Entity.cleanup_deleted()
    Collection.cleanup_deleted()
    Role.cleanup_deleted()
    db.session.commit()
Пример #23
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        coll = self.find_collection(
            url, {'label': 'Investigative Dashboard Requests'})
        Permission.grant_foreign(coll, 'idashboard:occrp_staff', True, False)
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()

            for req in data.get('paginator', {}).get('object_list'):
                # TODO: get the ID API fixed.
                self.update_entity(req, coll)

        self.emit_collection(coll)
Пример #24
0
def delete_collection(collection, keep_metadata=False, sync=False):
    reset_collection(collection, sync=False)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=sync)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Пример #25
0
    def load_fixtures(self):
        self.admin = self.create_user(foreign_id='admin', is_admin=True)
        self.private_coll = self.create_collection(foreign_id='test_private',
                                                   label="Private Collection",
                                                   category='grey',
                                                   casefile=False,
                                                   creator=self.admin)
        self._banana = Entity.create(
            {
                'schema': 'Person',
                'properties': {
                    'name': ['Banana'],
                }
            }, self.private_coll)
        user = Role.by_foreign_id(Role.SYSTEM_USER)
        Permission.grant(self.private_coll, user, True, False)
        self.public_coll = self.create_collection(foreign_id='test_public',
                                                  label="Public Collection",
                                                  category='news',
                                                  casefile=False,
                                                  creator=self.admin)
        self._kwazulu = Entity.create(
            {
                'schema': 'Company',
                'properties': {
                    'name': ['KwaZulu'],
                    'alias': ['kwazulu']
                }
            }, self.public_coll)
        visitor = Role.by_foreign_id(Role.SYSTEM_GUEST)
        Permission.grant(self.public_coll, visitor, True, False)
        db.session.commit()

        drop_aggregator(self.public_coll)
        stage = get_stage(self.public_coll, OP_PROCESS)
        process_collection(stage, self.public_coll, ingest=False, sync=True)

        aggregator = get_aggregator(self.private_coll)
        aggregator.delete()
        stage = get_stage(self.private_coll, OP_PROCESS)
        for sample in read_entities(self.get_fixture_path('samples.ijson')):
            aggregator.put(sample, fragment='sample')
            index_aggregate(stage,
                            self.private_coll,
                            entity_id=sample.id,
                            sync=True)
        aggregator.close()
        process_collection(stage, self.private_coll, ingest=False, sync=True)
Пример #26
0
def permissions_index(id):
    collection = get_db_collection(id, request.authz.WRITE)
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection.id)
    permissions = []
    roles = [r for r in Role.all_groups() if check_visible(r, request.authz)]
    for permission in q.all():
        if not check_visible(permission.role, request.authz):
            continue
        permissions.append(permission)
        if permission.role in roles:
            roles.remove(permission.role)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in roles:
        permissions.append({
            'collection_id': collection.id,
            'write': False,
            'read': False,
            'role': role
        })

    return jsonify({
        'total': len(permissions),
        'results': PermissionSchema().dump(permissions, many=True)
    })
Пример #27
0
def index(id):
    collection = get_db_collection(id, request.authz.WRITE)
    roles = Role.all_groups(request.authz).all()
    if request.authz.is_admin:
        roles.extend(Role.all_system())
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection.id)
    permissions = []
    for permission in q.all():
        if not check_visible(permission.role, request.authz):
            continue
        permissions.append(permission)
        if permission.role in roles:
            roles.remove(permission.role)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in roles:
        if collection.casefile and role.is_public:
            continue
        permissions.append({
            'collection_id': collection.id,
            'write': False,
            'read': False,
            'role_id': str(role.id)
        })

    permissions = PermissionSerializer().serialize_many(permissions)
    return jsonify({'total': len(permissions), 'results': permissions})
Пример #28
0
def permissions_index(collection):
    request.authz.require(request.authz.collection_write(collection))
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection)
    permissions = []
    roles_seen = set()
    for permission in q.all():
        if check_visible(permission.role):
            permissions.append(permission)
            roles_seen.add(permission.role.id)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in Role.all_groups():
        if check_visible(role):
            if role.id not in roles_seen:
                roles_seen.add(role.id)
                permissions.append({
                    'write': False,
                    'read': False,
                    'role': role,
                    'role_id': role.id
                })

    return jsonify({'total': len(permissions), 'results': permissions})
Пример #29
0
def index(id):
    collection = get_db_collection(id, request.authz.WRITE)
    record_audit(Audit.ACT_COLLECTION, id=id)
    roles = [r for r in Role.all_groups() if check_visible(r, request.authz)]
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection.id)
    permissions = []
    for permission in q.all():
        if not check_visible(permission.role, request.authz):
            continue
        permissions.append(permission)
        if permission.role in roles:
            roles.remove(permission.role)

    # this workaround ensures that all groups are visible for the user to
    # select in the UI even if they are not currently associated with the
    # collection.
    for role in roles:
        if collection.casefile and role.is_public:
            continue
        permissions.append({
            'collection_id': collection.id,
            'write': False,
            'read': False,
            'role_id': str(role.id)
        })

    permissions = PermissionSerializer().serialize_many(permissions)
    return jsonify({
        'total': len(permissions),
        'results': permissions
    })
Пример #30
0
    def crawl(self):
        url = urljoin(self.host, '/ticket/all_closed/?format=json')
        coll = self.find_collection(url, {
            'label': 'Investigative Dashboard Requests'
        })
        Permission.grant_foreign(coll, 'idashboard:occrp_staff',
                                 True, False)
        for endpoint in ['all_closed', 'all_open']:
            url = urljoin(self.host, '/ticket/%s/?format=json' % endpoint)
            data = self.session.get(url).json()

            for req in data.get('paginator', {}).get('object_list'):
                # TODO: get the ID API fixed.
                self.update_entity(req, coll)

        self.emit_collection(coll)
Пример #31
0
def permissions_index(collection):
    authz.require(authz.collection_write(collection))
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection)
    return jsonify({
        'total': q.count(),
        'results': q
    })
Пример #32
0
def source_permissions_index(source=None):
    authz.require(authz.source_write(source))
    q = Permission.all()
    q = q.filter(Permission.resource_type == Permission.SOURCE)
    q = q.filter(Permission.resource_id == source)
    return jsonify({
        'total': q.count(),
        'results': q
    })
Пример #33
0
def collection_permissions_index(collection=None):
    authz.require(authz.collection_write(collection))
    q = Permission.all()
    q = q.filter(Permission.resource_type == Permission.COLLECTION)
    q = q.filter(Permission.resource_id == collection)
    return jsonify({
        'total': q.count(),
        'results': q
    })
Пример #34
0
def permissions_index(collection):
    request.authz.require(request.authz.collection_write(collection))
    q = Permission.all()
    q = q.filter(Permission.collection_id == collection)
    permissions = []
    for permission in q.all():
        if check_visible(permission.role):
            permissions.append(permission)
    return jsonify({'total': len(permissions), 'results': permissions})
Пример #35
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = Collection.by_foreign_id(url, {'label': label})
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme':
                    'opennames:%s' % source.get('source_id'),
                    'identifier':
                    entity.get('uid')
                }],
                'other_names': [],
                'name':
                entity.get('name'),
                '$schema':
                SCHEMA.get(entity.get('type'), '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)

            ent = Entity.save(data, collection_id=collection.id, merge=True)
            db.session.flush()
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()

        self.emit_collection(collection, terms)
Пример #36
0
def delete_collection_content(collection_id):
    # Deleting a collection affects many associated objects and requires
    # checks, so this is done manually and in detail here.
    q = db.session.query(Collection)
    q = q.filter(Collection.id == collection_id)
    collection = q.first()
    if collection is None:
        log.error("No collection with ID: %r", collection_id)
        return

    log.info("Deleting collection [%r]: %r", collection.id, collection.label)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection_id, deleted_at=deleted_at)
    Match.delete_by_collection(collection_id, deleted_at=deleted_at)
    Permission.delete_by_collection(collection_id, deleted_at=deleted_at)
    index.delete_collection(collection_id)
    index.delete_entities(collection_id)
    collection.delete(deleted_at=deleted_at)
    db.session.commit()
Пример #37
0
def delete_collection(collection, keep_metadata=False,
                      sync=False, reset_sync=False):
    reset_collection(collection, sync=reset_sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection.id, deleted_at=deleted_at)
    Mapping.delete_by_collection(collection.id, deleted_at=deleted_at)
    Diagram.delete_by_collection(collection.id, deleted_at=deleted_at)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        # Considering this metadata for now, might be wrong:
        Linkage.delete_by_collection(collection.id)

        Permission.delete_by_collection(collection.id, deleted_at=deleted_at)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=sync)
        Authz.flush()
    refresh_collection(collection.id, sync=True)
Пример #38
0
    def crawl_item(self, item, sources, source):
        source_data = item.meta.get('source', {})
        source_id = source_data.pop('foreign_id', source)
        if source_id is None:
            raise ValueError("No foreign_id for source given: %r" % item)
        if source_id not in sources:
            label = source_data.get('label', source_id)
            sources[source_id] = self.create_source(foreign_id=source_id,
                                                    label=label)
            if source_data.get('public'):
                Permission.grant_foreign(sources[source_id], Role.SYSTEM_GUEST,
                                         True, False)
            if source_data.get('users'):
                Permission.grant_foreign(sources[source_id], Role.SYSTEM_USER,
                                         True, False)

        log.info('Import: %r', item.identifier)
        meta = self.normalize_metadata(item)
        self.emit_file(sources[source_id], meta, item.data_path)
Пример #39
0
def delete_collection_content(collection_id):
    # Deleting a collection affects many associated objects and requires
    # checks, so this is done manually and in detail here.
    q = db.session.query(Collection)
    q = q.filter(Collection.id == collection_id)
    collection = q.first()
    if collection is None:
        log.error("No collection with ID: %r", collection_id)
        return

    log.info("Deleting collection [%r]: %r", collection.id, collection.label)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Entity.delete_by_collection(collection_id, deleted_at=deleted_at)
    Match.delete_by_collection(collection_id, deleted_at=deleted_at)
    Permission.delete_by_collection(collection_id, deleted_at=deleted_at)
    index.delete_collection(collection_id)
    index.delete_entities(collection_id)
    collection.delete(deleted_at=deleted_at)
    db.session.commit()
Пример #40
0
def update_permission(role, collection, read, write, editor_id=None):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)

    params = {'role': role, 'collection': collection}
    if (pre is None or not pre.read) and post.read:
        if role.is_public:
            publish(Events.PUBLISH_COLLECTION,
                    actor_id=editor_id,
                    params=params,
                    channels=[Notification.GLOBAL])
        else:
            publish(Events.GRANT_COLLECTION, actor_id=editor_id, params=params)
    elif pre is not None and pre.read and not post.read:
        publish(Events.REVOKE_COLLECTION, actor_id=editor_id, params=params)
    db.session.commit()
    Authz.flush()
    return post
Пример #41
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = Collection.by_foreign_id(url, {
            'label': label
        })
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            data = {
                'identifiers': [{
                    'scheme': 'opennames:%s' % source.get('source_id'),
                    'identifier': entity.get('uid')
                }],
                'other_names': [],
                'name': entity.get('name'),
                '$schema': SCHEMA.get(entity.get('type'),
                                      '/entity/entity.json#')
            }
            for on in entity.get('other_names', []):
                on['name'] = on.pop('other_name', None)
                data['other_names'].append(on)

            ent = Entity.save(data, collection_id=collection.id, merge=True)
            db.session.flush()
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s", ent.name)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()

        self.emit_collection(collection, terms)
Пример #42
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)
        source_name = source.get('source') or source.get('source_id')
        label = '%s - %s' % (source.get('publisher'), source_name)
        collection = Collection.by_foreign_id(url, {
            'label': label
        })
        Permission.grant_foreign(collection, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", collection.label)
        terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            if entity.get('name') is None:
                continue
            selectors = []
            for on in entity.get('other_names', []):
                selectors.append(on.get('other_name'))

            for iden in entity.get('identities', []):
                if iden.get('number'):
                    selectors.append(iden.get('number'))

            ent = Entity.by_foreign_id(entity.get('uid'), collection, {
                'name': entity.get('name'),
                'category': CATEGORIES.get(entity.get('type'), OTHER),
                'data': entity,
                'selectors': selectors
            })
            terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)

        for entity in collection.entities:
            if entity.id not in existing_entities:
                entity.delete()
        self.emit_collection(collection, terms)
Пример #43
0
def delete_collection(collection, keep_metadata=False, sync=False):
    cancel_queue(collection)
    aggregator = get_aggregator(collection)
    aggregator.drop()
    flush_notifications(collection, sync=sync)
    index.delete_entities(collection.id, sync=sync)
    xref_index.delete_xref(collection, sync=sync)
    deleted_at = collection.deleted_at or datetime.utcnow()
    Mapping.delete_by_collection(collection.id)
    EntitySet.delete_by_collection(collection.id, deleted_at)
    Entity.delete_by_collection(collection.id)
    Document.delete_by_collection(collection.id)
    if not keep_metadata:
        Permission.delete_by_collection(collection.id)
        collection.delete(deleted_at=deleted_at)
    db.session.commit()
    if not keep_metadata:
        index.delete_collection(collection.id, sync=True)
        Authz.flush()
    refresh_collection(collection.id)
Пример #44
0
def update_permission(role, collection, read, write, editor_id=None):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)
    params = {'role': role, 'collection': collection}
    if (pre is None or not pre.read) and post.read:
        if role.foreign_id == Role.SYSTEM_GUEST:
            publish(Events.PUBLISH_COLLECTION,
                    actor_id=editor_id,
                    params=params,
                    channels=[Notification.GLOBAL])
        else:
            publish(Events.GRANT_COLLECTION,
                    actor_id=editor_id,
                    params=params,
                    channels=[role])
    db.session.commit()
    Authz.flush()
    refresh_role(role)
    return post
Пример #45
0
    def crawl_item(self, item):
        coll_data = item.meta.get('source', {})
        coll_fk = coll_data.pop('foreign_id')
        if coll_fk is None:
            raise ValueError("No foreign_id for collection given: %r" % item)
        if coll_fk not in self.collections:
            label = coll_data.get('label', coll_fk)
            self.collections[coll_fk] = Collection.create({
                'foreign_id': coll_fk,
                'label': label
            })
            if coll_data.get('public'):
                Permission.grant_foreign(self.collections[coll_fk],
                                         Role.SYSTEM_GUEST,
                                         True, False)
            db.session.commit()

        log.info('Import: %r', item.identifier)
        meta = self.normalize_metadata(item)
        ingest_file(self.collections[coll_fk].id, meta,
                    item.data_path, move=False)
Пример #46
0
Файл: mf.py Проект: 01-/aleph
    def crawl_item(self, item, sources, source):
        source_data = item.meta.get('source', {})
        source_id = source_data.pop('foreign_id', source)
        if source_id is None:
            raise ValueError("No foreign_id for source given: %r" % item)
        if source_id not in sources:
            label = source_data.get('label', source_id)
            sources[source_id] = self.create_source(foreign_id=source_id,
                                                    label=label)
            if source_data.get('public'):
                Permission.grant_foreign(sources[source_id],
                                         Role.SYSTEM_GUEST,
                                         True, False)
            if source_data.get('users'):
                Permission.grant_foreign(sources[source_id],
                                         Role.SYSTEM_USER,
                                         True, False)

        log.info('Import: %r', item.identifier)
        meta = self.normalize_metadata(item)
        self.emit_file(sources[source_id], meta, item.data_path)
Пример #47
0
def permissions_update(collection):
    authz.require(authz.collection_write(collection))
    data = request_data()
    validate(data, "permission.json#")

    role = Role.all().filter(Role.id == data["role"]).first()
    if role is None:
        raise BadRequest()

    permission = Permission.grant_collection(collection, role, data["read"], data["write"])
    db.session.commit()
    log_event(request)
    return jsonify({"status": "ok", "updated": permission})
Пример #48
0
    def crawl_source(self, source):
        if source.get('source_id') in IGNORE_SOURCES:
            return

        json_file = source.get('data', {}).get('json')
        url = urljoin(JSON_PATH, json_file)

        watchlist = Watchlist.by_foreign_id(url, {
            'label': source.get('source_id')
        })
        Permission.grant_foreign(watchlist, Role.SYSTEM_GUEST, True, False)
        log.info(" > OpenNames collection: %s", watchlist.label)
        previous_terms = watchlist.terms
        updated_terms = set()
        existing_entities = []
        db.session.flush()
        entities = requests.get(url).json().get('entities', [])
        for entity in entities:
            if entity.get('name') is None:
                continue
            selectors = []
            for on in entity.get('other_names', []):
                selectors.append(on.get('other_name'))
            for iden in entity.get('identities', []):
                if iden.get('number'):
                    selectors.append(iden.get('number'))
            ent = Entity.by_foreign_id(entity.get('uid'), watchlist, {
                'name': entity.get('name'),
                'category': CATEGORIES.get(entity.get('type'), OTHER),
                'data': entity,
                'selectors': selectors
            })
            updated_terms.update(ent.terms)
            existing_entities.append(ent.id)
            log.info("  # %s (%s)", ent.name, ent.category)
        watchlist.delete_entities(spare=existing_entities)
        terms = previous_terms.symmetric_difference(updated_terms)
        self.emit_watchlist(watchlist, terms)
Пример #49
0
def update_permission(role, collection, read, write, editor_id=None):
    """Update a roles permission to access a given collection."""
    pre = Permission.by_collection_role(collection, role)
    post = Permission.grant(collection, role, read, write)

    params = {'role': role, 'collection': collection}
    if (pre is None or not pre.read) and post.read:
        if role.is_public:
            publish(Events.PUBLISH_COLLECTION,
                    actor_id=editor_id,
                    params=params,
                    channels=[Notification.GLOBAL])
        else:
            publish(Events.GRANT_COLLECTION,
                    actor_id=editor_id,
                    params=params)
    elif pre is not None and pre.read and not post.read:
        publish(Events.REVOKE_COLLECTION,
                actor_id=editor_id,
                params=params)
    db.session.commit()
    Authz.flush()
    refresh_role(role)
    return post
Пример #50
0
Файл: authz.py Проект: 01-/aleph
def sources(action):
    if not hasattr(request, 'auth_sources'):
        request.auth_sources = {READ: set(), WRITE: set()}
        if is_admin():
            for source_id, in Source.all_ids():
                request.auth_sources[READ].add(source_id)
                request.auth_sources[WRITE].add(source_id)
        else:
            q = Permission.all()
            q = q.filter(Permission.role_id.in_(request.auth_roles))
            q = q.filter(Permission.resource_type == Permission.SOURCE)
            for perm in q:
                if perm.read:
                    request.auth_sources[READ].add(perm.resource_id)
                if perm.write and request.logged_in:
                    request.auth_sources[WRITE].add(perm.resource_id)
    return list(request.auth_sources.get(action, []))
Пример #51
0
def collections(action):
    if not hasattr(request, 'auth_collections'):
        request.auth_collections = {READ: set(), WRITE: set()}
        if is_admin():
            q = Collection.all_ids().filter(Collection.deleted_at == None)  # noqa
            for col_id, in q:
                request.auth_collections[READ].add(col_id)
                request.auth_collections[WRITE].add(col_id)
        else:
            q = Permission.all()
            q = q.filter(Permission.role_id.in_(request.auth_roles))
            q = q.filter(Permission.collection_id != None)  # noqa
            for perm in q:
                if perm.read or perm.write:
                    request.auth_collections[READ].add(perm.collection_id)
                if perm.write and request.logged_in:
                    request.auth_collections[WRITE].add(perm.collection_id)
    return list(request.auth_collections.get(action, []))
Пример #52
0
def permissions_update(collection):
    authz.require(authz.collection_write(collection))
    data = request_data()
    validate(data, 'permission.json#')

    role = Role.all().filter(Role.id == data['role']).first()
    if role is None:
        raise BadRequest()

    permission = Permission.grant_collection(collection.id,
                                             role,
                                             data['read'],
                                             data['write'])
    db.session.commit()
    return jsonify({
        'status': 'ok',
        'updated': permission
    })
Пример #53
0
 def setUp(self):
     super(SourcesApiTestCase, self).setUp()
     self.source = Source()
     self.source.foreign_id = "test"
     self.source.label = "Test Collection"
     self.source.category = "news"
     db.session.add(self.source)
     db.session.flush()
     permission = Permission()
     permission.role_id = Role.system(Role.SYSTEM_USER)
     permission.read = True
     permission.write = True
     permission.resource_id = self.source.id
     permission.resource_type = Permission.SOURCE
     db.session.add(permission)
     db.session.commit()
Пример #54
0
def permissions_save(watchlist=None, source=None):
    if watchlist is not None:
        authz.require(authz.watchlist_write(watchlist))
    if source is not None:
        authz.require(authz.source_write(source))

    resource_type = Permission.WATCHLIST if watchlist else Permission.SOURCE
    resource_id = watchlist or source
    data = request_data()
    validate(data, permissions_schema)

    role = db.session.query(Role).filter(Role.id == data['role']).first()
    if role is None:
        raise BadRequest()

    permission = Permission.grant_resource(resource_type, resource_id, role,
                                           data['read'], data['write'])
    db.session.commit()
    return jsonify({
        'status': 'ok',
        'updated': permission
    })
Пример #55
0
Файл: util.py Проект: pudo/aleph
 def grant(self, collection, role, read, write):
     Permission.grant(collection, role, read, write)
     db.session.commit()
     update_collection(collection)