Пример #1
0
class Subscription(db.Model, IdModel, SoftDeleteModel):
    channel = db.Column(db.String(255), index=True)
    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    role = db.relationship(Role)

    @classmethod
    def find(cls, channel=None, role_id=None, deleted=False):
        q = cls.all(deleted=deleted)
        if channel is not None:
            q = q.filter(cls.channel == channel)
        if role_id is not None:
            q = q.filter(cls.role_id == role_id)
        return q.first()

    @classmethod
    def subscribe(cls, role, channel):
        subscription = cls.find(channel=channel, role_id=role.id)
        if subscription is None:
            subscription = cls()
        subscription.channel = channel
        subscription.role_id = role.id
        subscription.deleted_at = None
        db.session.add(subscription)
        return subscription

    @classmethod
    def unsubscribe(cls, role=None, channel=None, deleted_at=None):
        assert role is not None or channel is not None
        if deleted_at is None:
            deleted_at = datetime.utcnow()
        q = db.session.query(cls)
        if role is not None:
            q = q.filter(cls.role_id == role.id)
        if channel is not None:
            q = q.filter(cls.channel == channel)
        q.update({cls.deleted_at: deleted_at},
                 synchronize_session=False)
Пример #2
0
class Collection(db.Model, IdModel, SoftDeleteModel):
    """A set of documents and entities against which access control is
    enforced."""

    # Category schema for collections.
    # TODO: should this be configurable?
    CATEGORIES = {
        'news': lazy_gettext('News archives'),
        'leak': lazy_gettext('Leaks'),
        'land': lazy_gettext('Land registry'),
        'gazette': lazy_gettext('Gazettes'),
        'court': lazy_gettext('Court archives'),
        'company': lazy_gettext('Company registries'),
        'sanctions': lazy_gettext('Sanctions lists'),
        'procurement': lazy_gettext('Procurement'),
        'finance': lazy_gettext('Financial records'),
        'grey': lazy_gettext('Grey literature'),
        'library': lazy_gettext('Document libraries'),
        'license': lazy_gettext('Licenses and concessions'),
        'regulatory': lazy_gettext('Regulatory filings'),
        'poi': lazy_gettext('Persons of interest'),
        'customs': lazy_gettext('Customs declarations'),
        'census': lazy_gettext('Population census'),
        'transport': lazy_gettext('Air and maritime registers'),
        'other': lazy_gettext('Other material')
    }

    DEFAULT = 'other'

    label = db.Column(db.Unicode)
    summary = db.Column(db.Unicode, nullable=True)
    category = db.Column(db.Unicode, nullable=True)
    countries = db.Column(ARRAY(db.Unicode()), default=[])
    languages = db.Column(ARRAY(db.Unicode()), default=[])
    foreign_id = db.Column(db.Unicode, unique=True, nullable=False)
    publisher = db.Column(db.Unicode, nullable=True)
    publisher_url = db.Column(db.Unicode, nullable=True)
    info_url = db.Column(db.Unicode, nullable=True)
    data_url = db.Column(db.Unicode, nullable=True)

    # A casefile is a type of collection which is used to manage the state
    # of an investigation. Unlike normal collections, cases do not serve
    # as source material, but as a mechanism of analysis.
    casefile = db.Column(db.Boolean, default=False)

    creator_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)
    creator = db.relationship(Role)

    def touch(self):
        # https://www.youtube.com/watch?v=wv-34w8kGPM
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    def update(self, data, authz):
        self.label = data.get('label', self.label)
        self.summary = data.get('summary', self.summary)
        self.summary = data.get('summary', self.summary)
        self.publisher = data.get('publisher', self.publisher)
        self.publisher_url = data.get('publisher_url', self.publisher_url)
        self.info_url = data.get('info_url', self.info_url)
        self.data_url = data.get('data_url', self.data_url)
        self.countries = ensure_list(data.get('countries', self.countries))
        self.languages = ensure_list(data.get('languages', self.languages))

        # Some fields are editable only by admins in order to have
        # a strict separation between source evidence and case
        # material.
        if authz.is_admin:
            self.category = data.get('category', self.category)
            self.casefile = as_bool(data.get('casefile'),
                                    default=self.casefile)
            creator = Role.by_id(data.get('creator_id'))
            if creator is not None:
                self.creator = creator

        self.touch()
        db.session.flush()
        if self.creator is not None:
            Permission.grant(self, self.creator, True, True)

    @property
    def team_id(self):
        role = aliased(Role)
        perm = aliased(Permission)
        q = db.session.query(role.id)
        q = q.filter(role.type != Role.SYSTEM)
        q = q.filter(role.id == perm.role_id)
        q = q.filter(perm.collection_id == self.id)
        q = q.filter(perm.read == True)  # noqa
        q = q.filter(role.deleted_at == None)  # noqa
        q = q.filter(perm.deleted_at == None)  # noqa
        return [stringify(i) for (i, ) in q.all()]

    @property
    def secret(self):
        q = db.session.query(Permission.id)
        q = q.filter(Permission.role_id.in_(Role.public_roles()))
        q = q.filter(Permission.collection_id == self.id)
        q = q.filter(Permission.read == True)  # noqa
        q = q.filter(Permission.deleted_at == None)  # noqa
        return q.count() < 1

    @property
    def ns(self):
        if not hasattr(self, '_ns'):
            self._ns = Namespace(self.foreign_id)
        return self._ns

    def to_dict(self):
        data = self.to_dict_dates()
        data['category'] = self.DEFAULT
        if self.category in self.CATEGORIES:
            data['category'] = self.category
        data['kind'] = 'casefile' if self.casefile else 'source'
        data.update({
            'id': stringify(self.id),
            'collection_id': stringify(self.id),
            'foreign_id': self.foreign_id,
            'creator_id': stringify(self.creator_id),
            'team_id': self.team_id,
            'label': self.label,
            'summary': self.summary,
            'publisher': self.publisher,
            'publisher_url': self.publisher_url,
            'info_url': self.info_url,
            'data_url': self.data_url,
            'casefile': self.casefile,
            'secret': self.secret
        })
        return data

    @classmethod
    def by_foreign_id(cls, foreign_id, deleted=False):
        if foreign_id is None:
            return
        q = cls.all(deleted=deleted)
        return q.filter(cls.foreign_id == foreign_id).first()

    @classmethod
    def _apply_authz(cls, q, authz):
        if authz is not None and not authz.is_admin:
            q = q.join(Permission, cls.id == Permission.collection_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            q = q.filter(Permission.role_id.in_(authz.roles))
        return q

    @classmethod
    def all_authz(cls, authz, deleted=False):
        q = super(Collection, cls).all(deleted=deleted)
        return cls._apply_authz(q, authz)

    @classmethod
    def all_by_ids(cls, ids, deleted=False, authz=None):
        q = super(Collection, cls).all_by_ids(ids, deleted=deleted)
        return cls._apply_authz(q, authz)

    @classmethod
    def create(cls, data, authz, created_at=None):
        foreign_id = data.get('foreign_id') or make_textid()
        collection = cls.by_foreign_id(foreign_id, deleted=True)
        if collection is None:
            collection = cls()
            collection.created_at = created_at
            collection.foreign_id = foreign_id
            collection.category = cls.DEFAULT
            collection.casefile = True
            collection.creator_id = authz.id
        collection.update(data, authz)
        collection.deleted_at = None
        return collection

    def __repr__(self):
        fmt = '<Collection(%r, %r, %r)>'
        return fmt % (self.id, self.foreign_id, self.label)
Пример #3
0
    @classmethod
    def all_system(cls):
        return cls.all().filter(Role.type == Role.SYSTEM)

    def set_password(self, secret):
        """Hashes and sets the role password.

        :param str secret: The password to be set.
        """
        self.password_digest = generate_password_hash(secret)

    def check_password(self, secret):
        """Checks the password if it matches the role password hash.

        :param str secret: The password to be checked.
        :rtype: bool
        """
        digest = self.password_digest or ''
        return check_password_hash(digest, secret)

    def __repr__(self):
        return '<Role(%r,%r)>' % (self.id, self.foreign_id)


Role.members = db.relationship(Role,
                               secondary=membership,
                               primaryjoin=Role.id == membership.c.group_id,
                               secondaryjoin=Role.id == membership.c.member_id,
                               backref="roles")
Пример #4
0
class Notification(db.Model, IdModel, DatedModel):
    GLOBAL = 'Global'

    _event = db.Column('event', db.String(255), nullable=False)
    channels = db.Column(ARRAY(db.String(255)), index=True)
    params = db.Column(JSONB)

    actor_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)
    actor = db.relationship(Role)

    @hybrid_property
    def event(self):
        return Events.get(self._event)

    @event.setter
    def event(self, event):
        self._event = event.name

    @property
    def recipients(self):
        q = db.session.query(Role)
        q = q.join(Subscription, Subscription.role_id == Role.id)
        q = q.filter(Subscription.channel.in_(self.channels))
        q = q.filter(Role.email != None)  # noqa
        q = q.filter(Role.deleted_at == None)  # noqa
        q = q.filter(Subscription.deleted_at == None)  # noqa
        q = q.distinct()
        return q

    def iterparams(self):
        if self.actor_id is not None:
            yield 'actor', Role, self.actor_id
        if self.event is None:
            return
        for name, clazz in self.event.params.items():
            value = self.params.get(name)
            if value is not None:
                yield name, clazz, value

    @classmethod
    def publish(cls, event, actor_id=None, channels=[], params={}):
        notf = cls()
        notf.event = event
        notf.actor_id = actor_id
        notf.params = params
        notf.channels = list(set([c for c in channels if c is not None]))
        db.session.add(notf)
        return notf

    @classmethod
    def by_role(cls, role, since=None):
        columns = array_agg(Subscription.channel).label('channels')
        sq = db.session.query(columns)
        sq = sq.filter(Subscription.deleted_at == None)  # noqa
        sq = sq.filter(Subscription.role_id == role.id)
        sq = sq.cte('sq')
        q = cls.all()
        q = q.filter(or_(
            cls.actor_id != role.id,
            cls.actor_id == None  # noqa
        ))
        q = q.filter(cls.channels.overlap(sq.c.channels))
        q = q.filter(cls._event.in_(Events.names()))
        if since is not None:
            q = q.filter(cls.created_at >= since)
        if role.notified_at is not None:
            q = q.filter(cls.created_at >= role.notified_at)
        q = q.order_by(cls.created_at.desc())
        q = q.order_by(cls.id.desc())
        return q

    @classmethod
    def by_channel(cls, channel):
        q = cls.all()
        q = q.filter(cls.channels.any(channel))
        q = q.filter(cls._event.in_(Events.names()))
        q = q.order_by(cls.created_at.desc())
        q = q.order_by(cls.id.desc())
        return q
Пример #5
0
class EntitySetItem(db.Model, SoftDeleteModel):
    __tablename__ = "entityset_item"

    id = db.Column(db.Integer, primary_key=True)
    entityset_id = db.Column(db.String(ENTITY_ID_LEN),
                             db.ForeignKey("entityset.id"),
                             index=True)
    entity_id = db.Column(db.String(ENTITY_ID_LEN), index=True)
    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)

    compared_to_entity_id = db.Column(db.String(ENTITY_ID_LEN))
    added_by_id = db.Column(db.Integer, db.ForeignKey("role.id"))
    judgement = db.Column(db.Enum(Judgement))

    entityset = db.relationship(EntitySet)
    collection = db.relationship(Collection)
    added_by = db.relationship(Role)

    @classmethod
    def by_entity_id(cls, entityset, entity_id):
        q = cls.all()
        q = q.filter(cls.entityset_id == entityset.id)
        q = q.filter(cls.entity_id == entity_id)
        q = q.order_by(cls.created_at.desc())
        return q.first()

    @classmethod
    def save(cls,
             entityset,
             entity_id,
             judgement=None,
             collection_id=None,
             **data):
        if judgement is None:
            judgement = Judgement.POSITIVE
        else:
            judgement = Judgement(judgement)
        existing = cls.by_entity_id(entityset, entity_id)
        if existing is not None:
            if existing.judgement == judgement:
                return existing
            existing.delete()
        if judgement == Judgement.NO_JUDGEMENT:
            return
        item = cls(
            entityset_id=entityset.id,
            entity_id=entity_id,
            judgement=judgement,
            compared_to_entity_id=data.get("compared_to_entity_id"),
            collection_id=collection_id or entityset.collection_id,
            added_by_id=data.get("added_by_id"),
        )
        db.session.add(item)
        return item

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(EntitySet.collection_id == collection_id)
        pq = pq.filter(EntitySet.id == cls.entityset_id)
        pq.delete(synchronize_session=False)

    @classmethod
    def delete_by_entity(cls, entity_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.entity_id == entity_id)
        pq.delete(synchronize_session=False)

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            "entityset_id": self.entityset_id,
            "entity_id": self.entity_id,
            "collection_id": self.collection_id,
            "added_by_id": self.added_by_id,
            "compared_to_entity_id": self.compared_to_entity_id,
        })
        if self.judgement:
            data["judgement"] = self.judgement.value
        return data

    def __repr__(self):
        return "<EntitySetItem(%r, %r)>" % (self.entityset_id, self.entity_id)
Пример #6
0
class Role(db.Model, IdModel, SoftDeleteModel):
    """A user, group or other access control subject."""
    __tablename__ = 'role'

    USER = '******'
    GROUP = 'group'
    SYSTEM = 'system'
    TYPES = [USER, GROUP, SYSTEM]

    SYSTEM_GUEST = 'guest'
    SYSTEM_USER = '******'

    #: Generates URL-safe signatures for invitations.
    SIGNATURE = URLSafeTimedSerializer(settings.SECRET_KEY)

    #: Signature maximum age, defaults to 1 day
    SIGNATURE_MAX_AGE = 60 * 60 * 24

    #: Password minimum length
    PASSWORD_MIN_LENGTH = 6

    foreign_id = db.Column(db.Unicode(2048), nullable=False, unique=True)
    name = db.Column(db.Unicode, nullable=False)
    email = db.Column(db.Unicode, nullable=True)
    api_key = db.Column(db.Unicode, nullable=True)
    is_admin = db.Column(db.Boolean, nullable=False, default=False)
    type = db.Column(db.Enum(*TYPES, name='role_type'), nullable=False)
    password_digest = db.Column(db.Unicode, nullable=True)
    password = None
    reset_token = db.Column(db.Unicode, nullable=True)
    permissions = db.relationship('Permission', backref='role')

    @property
    def has_password(self):
        return self.password_digest is not None

    def update(self, data):
        self.name = data.get('name', self.name)
        if data.get('password'):
            self.set_password(data.get('password'))

    def clear_roles(self):
        """Removes any existing roles from group membership."""
        self.roles = []
        db.session.add(self)

    def add_role(self, role):
        """Adds an existing role as a membership of a group."""
        self.roles.append(role)
        db.session.add(role)
        db.session.add(self)

    @classmethod
    def notifiable(cls):
        return cls.all_ids().filter(cls.email != None)  # noqa

    @classmethod
    def by_foreign_id(cls, foreign_id):
        if foreign_id is not None:
            return cls.all().filter_by(foreign_id=foreign_id).first()

    @classmethod
    def by_email(cls, email):
        if email:
            return cls.all().filter_by(email=email)

    @classmethod
    def by_api_key(cls, api_key):
        if api_key is not None:
            return cls.all().filter_by(api_key=api_key).first()

    @classmethod
    def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None):
        role = cls.by_foreign_id(foreign_id)

        if role is None:
            role = cls()
            role.foreign_id = foreign_id
            role.name = name
            role.type = type
            role.is_admin = False

        if role.api_key is None:
            role.api_key = make_textid()

        role.email = email
        if is_admin is not None:
            role.is_admin = is_admin

        # see: https://github.com/alephdata/aleph/issues/111
        auto_admins = [a.lower() for a in settings.ADMINS]
        if email is not None and email.lower() in auto_admins:
            role.is_admin = True

        db.session.add(role)
        db.session.flush()

        return role

    @classmethod
    def load_id(cls, foreign_id, type=None, name=None):
        """Load a role and return the ID.

        If type is given and no role is found, a new role will be created.
        """
        if not hasattr(current_app, '_authz_roles'):
            current_app._authz_roles = {}
        if foreign_id not in current_app._authz_roles:
            role = cls.by_foreign_id(foreign_id)
            if role is None:
                if type is None:
                    return
                name = name or foreign_id
                role = cls.load_or_create(foreign_id, type, name)
            current_app._authz_roles[foreign_id] = role.id
        return current_app._authz_roles[foreign_id]

    @classmethod
    def public_roles(cls):
        """Roles which make a collection to be considered public."""
        return set([
            cls.load_id(cls.SYSTEM_USER),
            cls.load_id(cls.SYSTEM_GUEST),
        ])

    @classmethod
    def by_prefix(cls, prefix):
        """Load a list of roles matching a name, email address, or foreign_id.

        :param str pattern: Pattern to match.
        """
        q = cls.all()
        q = q.filter(Role.type == Role.USER)
        q = q.filter(
            or_(cls.foreign_id.ilike('%' + prefix + '%'),
                cls.email.ilike('%' + prefix + '%'),
                cls.name.ilike('%' + prefix + '%')))
        return q

    @classmethod
    def all_groups(cls):
        return cls.all().filter(Role.type != Role.USER)

    def set_password(self, secret):
        """Hashes and sets the role password.

        :param str secret: The password to be set.
        """
        self.password_digest = generate_password_hash(secret)

    def check_password(self, secret):
        """Checks the password if it matches the role password hash.

        :param str secret: The password to be checked.
        :rtype: bool
        """
        return check_password_hash(self.password_digest or '', secret)

    def __repr__(self):
        return '<Role(%r,%r)>' % (self.id, self.foreign_id)
Пример #7
0
class Alert(db.Model, SoftDeleteModel):
    """A subscription to notifications on a given query."""

    __tablename__ = 'alert'

    id = db.Column(db.Integer, primary_key=True)
    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    custom_label = db.Column(db.Unicode, nullable=True)
    query_text = db.Column(db.Unicode, nullable=True)
    entity_id = db.Column(db.String(32),
                          db.ForeignKey('entity.id'),
                          nullable=True)  # noqa
    entity = db.relationship(Entity,
                             backref=db.backref('alerts',
                                                lazy='dynamic'))  # noqa
    notified_at = db.Column(db.DateTime, nullable=True)

    @property
    def label(self):
        if self.custom_label is not None:
            return self.custom_label
        if self.entity:
            return self.entity.name
        return self.query_text

    def delete(self, deleted_at=None):
        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def update(self):
        self.notified_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def is_same(self, other):
        if other.role_id == self.role_id:
            if other.entity_id == self.entity_id:
                if other.query_text == self.query_text:
                    return True
        return False

    @classmethod
    def by_id(cls, id, role=None):
        q = cls.all().filter_by(id=id)
        if role is not None:
            q = q.filter(cls.role_id == role.id)
        return q.first()

    @classmethod
    def by_role(cls, role):
        return cls.all().filter(cls.role_id == role.id)

    @classmethod
    def create(cls, data, role):
        validate(data, 'alert.json#')
        alert = cls()
        alert.role_id = role.id
        alert.query_text = data.get('query_text')
        if alert.query_text is not None:
            alert.query_text = alert.query_text.strip()
            alert.query_text = alert.query_text or None
        alert.entity_id = data.get('entity_id') or None
        alert.custom_label = data.get('label')
        alert.update()
        return alert

    @classmethod
    def exists(cls, query, role):
        q = cls.all_ids().filter(cls.role_id == role.id)
        query_text = query.get('q')
        if query_text is not None:
            query_text = query_text.strip()
            if not len(query_text):
                query_text = None
        q = q.filter(cls.query_text == query_text)
        entities = query.getlist('entity')
        if len(entities) == 1:
            q = q.filter(cls.entity_id == entities[0])
        else:
            q = q.filter(cls.entity_id == None)  # noqa
        q = q.limit(1)
        return q.scalar()

    @classmethod
    def dedupe(cls, entity_id):
        alerts = cls.all().filter_by(entity_id=entity_id).all()
        for left in alerts:
            for right in alerts:
                if left.id >= right.id:
                    continue
                if left.is_same(right):
                    left.delete()

    def __repr__(self):
        return '<Alert(%r, %r)>' % (self.id, self.label)

    def to_query(self):
        return MultiDict({
            'q': self.query_text or '',
            'entity': self.entity_id
        })

    def to_dict(self):
        return {
            'id': self.id,
            'label': self.label,
            'role_id': self.role_id,
            'query_text': self.query_text,
            'entity_id': self.entity_id,
            'created_at': self.created_at,
            'notified_at': self.notified_at,
            'updated_at': self.updated_at
        }
Пример #8
0
class List(db.Model):
    id = db.Column(db.Integer(), primary_key=True)
    label = db.Column(db.Unicode)
    public = db.Column(db.Boolean, default=False)

    creator_id = db.Column(db.Integer(),
                           db.ForeignKey('user.id'),
                           nullable=True)
    creator = db.relationship(User)

    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime,
                           default=datetime.utcnow,
                           onupdate=datetime.utcnow)

    users = db.relationship(User, secondary=list_user_table, backref='lists')

    def to_dict(self):
        return {
            'id': self.id,
            'api_url': url_for('lists.view', id=self.id),
            'entities_api_url': url_for('entities.index', list=self.id),
            'label': self.label,
            'public': self.public,
            'creator_id': self.creator_id,
            'created_at': self.created_at,
            'updated_at': self.updated_at
        }

    @classmethod
    def create(cls, data, user):
        lst = cls()
        lst.update(data, user)
        lst.creator = user
        db.session.add(lst)
        return lst

    def update(self, data, user):
        data = ListForm().deserialize(data)
        self.label = data.get('label')
        if data.get('public') is not None:
            self.public = data.get('public')
        users = set(data.get('users', []))
        if user is not None:
            users.add(user)
        self.users = list(users)

    def delete(self):
        # for entity in self.entities:
        #     entity.delete()
        db.session.delete(self)

    @classmethod
    def by_label(cls, label):
        q = db.session.query(cls).filter_by(label=label)
        return q.first()

    @classmethod
    def by_id(cls, id):
        q = db.session.query(cls).filter_by(id=id)
        return q.first()

    @classmethod
    def user_list_ids(cls, user=None, include_public=True):
        logged_in = user is not None and user.is_authenticated()
        q = db.session.query(cls.id)
        conds = []
        if include_public:
            conds.append(cls.public == True)  # noqa
        if logged_in:
            conds.append(cls.users.any(User.id == user.id))
        if not len(conds):
            return []
        if not (logged_in and user.is_admin):
            q = q.filter(or_(*conds))
        return [c.id for c in q.all()]

    @classmethod
    def all_by_user(cls, user):
        q = db.session.query(cls)
        q = q.filter(cls.id.in_(cls.user_list_ids(user)))
        q = q.order_by(cls.id.desc())
        return q

    @property
    def terms(self):
        from aleph.model.entity import Entity
        from aleph.model.selector import Selector
        q = db.session.query(Selector.normalized)
        q = q.join(Entity, Entity.id == Selector.entity_id)
        q = q.filter(Entity.list_id == self.id)
        q = q.distinct()
        return set([r[0] for r in q])

    def __repr__(self):
        return '<List(%r, %r)>' % (self.id, self.label)

    def __unicode__(self):
        return self.label
Пример #9
0
class Mapping(db.Model, DatedModel):
    """A mapping to load entities from a table"""

    __tablename__ = "mapping"

    FAILED = "failed"
    SUCCESS = "success"
    PENDING = "pending"
    STATUS = {
        SUCCESS: lazy_gettext("success"),
        FAILED: lazy_gettext("failed"),
        PENDING: lazy_gettext("pending"),
    }

    id = db.Column(db.Integer, primary_key=True)
    query = db.Column("query", JSONB)

    role_id = db.Column(db.Integer, db.ForeignKey("role.id"), index=True)
    role = db.relationship(Role,
                           backref=db.backref("mappings",
                                              lazy="dynamic"))  # noqa

    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)
    collection = db.relationship(Collection,
                                 backref=db.backref("mappings",
                                                    lazy="dynamic"))

    table_id = db.Column(db.String(ENTITY_ID_LEN), index=True)

    disabled = db.Column(db.Boolean, nullable=True)
    last_run_status = db.Column(db.Unicode, nullable=True)
    last_run_err_msg = db.Column(db.Unicode, nullable=True)

    def get_proxy_context(self):
        """Metadata to be added to each generated entity."""
        return {
            "created_at": iso_text(self.created_at),
            "updated_at": iso_text(self.updated_at),
            "role_id": self.role_id,
            "mutable": True,
        }

    def update(self, query=None, table_id=None):
        self.updated_at = datetime.utcnow()
        if query:
            self.query = query
        if table_id:
            self.table_id = table_id
        db.session.add(self)

    def set_status(self, status, error=None):
        self.last_run_status = status
        self.last_run_err_msg = error
        db.session.add(self)

    def to_dict(self):
        data = self.to_dict_dates()
        status = self.STATUS.get(self.last_run_status)
        data.update({
            "id": stringify(self.id),
            "query": dict(self.query),
            "role_id": stringify(self.role_id),
            "collection_id": stringify(self.collection_id),
            "table_id": self.table_id,
            "last_run_status": status,
            "last_run_err_msg": self.last_run_err_msg,
        })
        return data

    @classmethod
    def by_collection(cls, collection_id, table_id=None):
        q = cls.all().filter(cls.collection_id == collection_id)
        if table_id is not None:
            q = q.filter(cls.table_id == table_id)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

    @classmethod
    def delete_by_table(cls, entity_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.table_id == entity_id)
        pq.delete(synchronize_session=False)

    @classmethod
    def create(cls, query, table_id, collection, role_id):
        mapping = cls()
        mapping.role_id = role_id
        mapping.query = query
        mapping.collection_id = collection.id
        mapping.table_id = table_id
        mapping.update()
        return mapping

    def __repr__(self):
        return "<Mapping(%r, %r)>" % (self.id, self.table_id)
Пример #10
0
class Document(db.Model, DatedModel):
    _schema = 'document.json#'

    TYPE_TEXT = 'text'
    TYPE_TABULAR = 'tabular'
    TYPE_OTHER = 'other'

    id = db.Column(db.BigInteger, primary_key=True)
    content_hash = db.Column(db.Unicode(65), nullable=False, index=True)
    foreign_id = db.Column(db.Unicode, unique=False, nullable=True)
    type = db.Column(db.Unicode(10), nullable=False, index=True)
    _meta = db.Column('meta', JSONB)

    collections = db.relationship(
        Collection,
        secondary=collection_document_table,  # noqa
        backref=db.backref('documents', lazy='dynamic'))  # noqa
    source_collection_id = db.Column(db.Integer,
                                     db.ForeignKey('collection.id'),
                                     nullable=True)  # noqa
    source_collection = db.relationship(Collection)

    @property
    def title(self):
        return self.meta.title

    @hybrid_property
    def meta(self):
        self._meta = self._meta or {}
        self._meta['content_hash'] = self.content_hash
        self._meta['foreign_id'] = self.foreign_id
        return Metadata.from_data(self._meta or {})

    @meta.setter
    def meta(self, meta):
        if isinstance(meta, Metadata):
            self.content_hash = meta.content_hash
            self.foreign_id = meta.foreign_id
            meta = meta.to_attr_dict()
        self._meta = meta
        flag_modified(self, '_meta')

    def update(self, data, writeable):
        validate(data, self._schema)
        collection_id = data.pop('collection_id', [])
        self.update_collections(collection_id, writeable)
        meta = self.meta
        meta.update(data, safe=True)
        self.meta = meta
        db.session.add(self)

    def update_collections(self, collection_id, writeable):
        for coll in self.collections:
            if coll.id == self.source_collection_id:
                continue
            if coll.id not in collection_id and coll.id in writeable:
                self.collections.remove(coll)
        for coll_id in collection_id:
            if coll_id in writeable:
                coll = Collection.by_id(coll_id)
                if coll not in self.collections:
                    self.collections.append(coll)
        db.session.add(self)

    def delete_pages(self):
        pq = db.session.query(DocumentPage)
        pq = pq.filter(DocumentPage.document_id == self.id)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete_records(self):
        pq = db.session.query(DocumentRecord)
        pq = pq.filter(DocumentRecord.document_id == self.id)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete_references(self, origin=None):
        pq = db.session.query(Reference)
        pq = pq.filter(Reference.document_id == self.id)
        if origin is not None:
            pq = pq.filter(Reference.origin == origin)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_references()
        self.delete_records()
        self.delete_pages()
        db.session.delete(self)

    def insert_records(self, sheet, iterable, chunk_size=1000):
        chunk = []
        for i, data in enumerate(iterable):
            chunk.append({
                'document_id': self.id,
                'row_id': i,
                'sheet': sheet,
                'data': data
            })
            if len(chunk) >= chunk_size:
                db.session.bulk_insert_mappings(DocumentRecord, chunk)
                chunk = []

        if len(chunk):
            db.session.bulk_insert_mappings(DocumentRecord, chunk)

    def text_parts(self):
        if self.type == self.TYPE_TEXT:
            for page in self.pages:
                for text in page.text_parts():
                    yield text
        elif self.type == self.TYPE_TABULAR:
            for record in self.records:
                for text in record.text_parts():
                    yield text

    @classmethod
    def get_max_id(cls):
        q = db.session.query(func.max(cls.id))
        return q.scalar()

    def __repr__(self):
        return '<Document(%r,%r,%r)>' % (self.id, self.type, self.meta.title)

    @property
    def collection_ids(self):
        collection_ids = [c.id for c in self.collections]
        if self.source_collection_id not in collection_ids:
            if self.source_collection_id is not None:
                collection_ids.append(self.source_collection_id)
        return collection_ids

    def _add_to_dict(self, data):
        collection_ids = self.collection_ids
        try:
            from aleph.authz import collections_public
            data['public'] = collections_public(collection_ids)
        except:
            pass
        data.update({
            'id': self.id,
            'type': self.type,
            'source_collection_id': self.source_collection_id,
            'collection_id': collection_ids,
            'created_at': self.created_at,
            'updated_at': self.updated_at
        })
        return data

    def to_dict(self):
        data = self.meta.to_dict()
        return self._add_to_dict(data)

    def to_index_dict(self):
        data = self.meta.to_index_dict()
        return self._add_to_dict(data)
Пример #11
0
class Entity(db.Model, UuidModel, SoftDeleteModel, SchemaModel):
    _schema = '/entity/entity.json#'
    _schema_recurse = True

    STATE_ACTIVE = 'active'
    STATE_PENDING = 'pending'
    STATE_DELETED = 'deleted'

    name = db.Column(db.Unicode)
    type = db.Column('type', db.String(255), index=True)
    state = db.Column(db.String(128), nullable=True, default=STATE_ACTIVE)
    summary = db.Column(db.Unicode, nullable=True)
    description = db.Column(db.Unicode, nullable=True)
    jurisdiction_code = db.Column(db.Unicode, nullable=True)
    register_name = db.Column(db.Unicode, nullable=True)
    register_url = db.Column(db.Unicode, nullable=True)

    __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': _schema}

    collections = db.relationship(
        Collection,
        secondary=collection_entity_table,  # noqa
        backref=db.backref('entities', lazy='dynamic'))  # noqa

    def delete_references(self, origin=None):
        pq = db.session.query(Reference)
        pq = pq.filter(Reference.entity_id == self.id)
        if origin is not None:
            pq = pq.filter(Reference.origin == origin)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_references()
        deleted_at = deleted_at or datetime.utcnow()
        for alert in self.alerts:
            alert.delete(deleted_at=deleted_at)
        self.state = self.STATE_DELETED
        super(Entity, self).delete(deleted_at=deleted_at)

    def update(self, data, merge=False):
        self.schema_update(data, merge=merge)

    def merge(self, other):
        if self.id == other.id:
            return

        # De-dupe todo:
        # 1. merge identifiers
        # 2. merge properties
        # 3. merge names, make merged names into a.k.a's
        # 4. merge collections
        # 5. update references
        # 6. update alerts
        # 7. delete source entities
        # 8. update source entities
        # 9. update target entity

        collections = list(self.collections)
        for collection in other.collections:
            if collection not in collections:
                self.collections.append(collection)

        if self.name.lower() != other.name.lower():
            aka = EntityOtherName()
            aka.update({'name': other.name})
            aka.entity = self
            db.session.add(aka)

        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({'entity_id': self.id})

        from aleph.model.reference import Reference
        q = db.session.query(Reference).filter(Reference.entity_id == other.id)
        q.update({'entity_id': self.id})
        db.session.commit()

        db.session.refresh(other)
        self.schema_merge(other)

    def schema_merge(self, other):
        """Attempt to merge other onto self via JSON schema."""
        # TODO: figure out if we want to change schema
        for prop in self.schema_visitor.properties:
            if prop.name == 'id':
                continue

            self_value = getattr(self, prop.name) if \
                hasattr(self, prop.name) else None
            other_value = getattr(other, prop.name) if \
                hasattr(other, prop.name) else None

            if self_value is None and other_value is None:
                continue

            if prop.is_value and self_value is None:
                # update local properties
                setattr(self, prop.name, other_value)

            elif prop.is_object and self._schema_recurse:
                # update associated objects which are not set on the
                # existing object.
                rel = self._get_relationship(prop.name, 'MANYTOONE')
                if self_value is not None or other_value is None:
                    continue
                data = other_value.to_dict()
                obj = type(other_value)()
                obj.update(data)
                for local, remote in self._get_associations(obj, rel):
                    other_id = getattr(obj, remote)
                    setattr(self, local, other_id)

            elif prop.is_array and self._schema_recurse \
                    and other_value is not None:
                # merge array associations
                rel = self._get_relationship(prop.name, 'ONETOMANY')
                full_list = list(self_value)

                for new_item in other_value:
                    data = new_item.to_dict()
                    existing = [o for o in full_list if o.merge_compare(data)]
                    if len(existing):
                        continue

                    obj = type(new_item)()
                    obj.update(data)
                    for local, remote in self._get_associations(obj, rel):
                        setattr(obj, remote, getattr(self, local))
                    db.session.add(obj)
                    full_list.append(obj)

        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()
        other.delete()
        db.session.flush()

    @classmethod
    def save(cls, data, collections, merge=False):
        ent = cls.by_id(data.get('id'))
        if 'state' not in data:
            data['state'] = cls.STATE_ACTIVE

        for identifier in data.get('identifiers', []):
            if ent is None:
                ent = cls.by_identifier(identifier.get('scheme'),
                                        identifier.get('identifier'),
                                        collections=collections)
        if ent is None:
            schema = data.get('$schema', cls._schema)
            cls = cls.get_schema_class(schema)
            ent = cls()
            ent.id = make_textid()

        if merge:
            for collection in ent.collections:
                if collection.id not in [c.id for c in collections]:
                    collections.append(collection)
        if not len(collections):
            raise AttributeError("No collection specified.")

        ent.collections = collections
        ent.update(data, merge=merge)
        return ent

    @classmethod
    def filter_collections(cls, q, collections=None):
        if collections is None:
            return q
        collection_ids = []
        for collection in collections:
            if isinstance(collection, Collection):
                collection = collection.id
            collection_ids.append(collection)
        coll = aliased(Collection)
        q = q.join(coll, Entity.collections)
        q = q.filter(coll.id.in_(collection_ids))
        q = q.filter(coll.deleted_at == None)  # noqa
        return q

    @classmethod
    def by_identifier(cls, scheme, identifier, collections=None):
        q = db.session.query(Entity)
        q = q.filter(Entity.deleted_at == None)  # noqa
        q = cls.filter_collections(q, collections=collections)
        ident = aliased(EntityIdentifier)
        q = q.join(ident, Entity.identifiers)
        q = q.filter(ident.deleted_at == None)  # noqa
        q = q.filter(ident.scheme == scheme)
        q = q.filter(ident.identifier == identifier)
        return q.first()

    @classmethod
    def by_id_set(cls, ids, collections=None):
        if not len(ids):
            return {}
        q = cls.all()
        q = cls.filter_collections(q, collections=collections)
        q = q.options(joinedload('collections'))
        q = q.filter(cls.id.in_(ids))
        entities = {}
        for ent in q:
            entities[ent.id] = ent
        return entities

    @classmethod
    def latest(cls):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.state == cls.STATE_ACTIVE)
        return q.scalar()

    @classmethod
    def all_by_document(cls, document_id):
        from aleph.model.reference import Reference
        q = cls.all()
        q = q.options(joinedload('collections'))
        q = q.filter(cls.state == cls.STATE_ACTIVE)
        q = q.join(Reference)
        q = q.filter(Reference.document_id == document_id)
        return q.distinct()

    @property
    def fingerprint(self):
        return make_fingerprint(self.name)

    @property
    def terms(self):
        terms = set([self.name])
        for other_name in self.other_names:
            terms.update(other_name.terms)
        return [t for t in terms if t is not None and len(t)]

    @property
    def regex_terms(self):
        # This is to find the shortest possible regex for each entity.
        # If, for example, and entity matches both "Al Qaeda" and
        # "Al Qaeda in Iraq, Syria and the Levant", it is useless to
        # search for the latter.
        terms = [' %s ' % normalize_strong(t) for t in self.terms]
        regex_terms = set()
        for term in terms:
            if len(term) < 4 or len(term) > 120:
                continue
            contained = False
            for other in terms:
                if other == term:
                    continue
                if other in term:
                    contained = True
            if not contained:
                regex_terms.add(term.strip())
        return regex_terms

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)

    def __unicode__(self):
        return self.name

    def to_dict(self):
        data = super(Entity, self).to_dict()
        data['collection_id'] = [c.id for c in self.collections]
        return data

    def to_ref(self):
        return {
            'id': self.id,
            'name': self.name,
            '$schema': self.type,
            'collection_id': [c.id for c in self.collections]
        }
Пример #12
0
class User(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    email = db.Column(
        db.Unicode,
        #following attributes are for flask-user
        nullable=False,
        unique=True)
    display_name = db.Column(db.Unicode, nullable=True)
    active = db.Column(db.Boolean, nullable=False, default=True)

    # Aleph-specific columns
    is_admin = db.Column(db.Boolean, nullable=False, default=False)

    twitter_id = db.Column(db.Unicode)
    facebook_id = db.Column(db.Unicode)

    api_key = db.Column(db.Unicode, default=make_token)

    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime,
                           default=datetime.utcnow,
                           onupdate=datetime.utcnow)

    # Columns required for flask-user

    confirmed_at = db.Column(db.DateTime())
    password = db.Column(db.String(255), nullable=False, server_default='')
    reset_password_token = db.Column(db.String(100),
                                     nullable=False,
                                     server_default='')
    # 'active' already defined above
    # omitting first and last name

    # Relationships
    roles = db.relationship('Role',
                            secondary='roles_users',
                            backref=db.backref('user', lazy='dynamic'))

    def is_active(self):
        return self.active

    def is_authenticated(self):
        return True

    def is_anonymous(self):
        return False

    def get_id(self):
        return unicode(self.id)

    def __repr__(self):
        return '<User(%r,%r)>' % (self.id, self.email)

    def __unicode__(self):
        return self.display_name

    def to_dict(self):
        return {
            'id': self.id,
            'api_url': url_for('users.view', id=self.id),
            'email': self.email,
            'display_name': self.display_name
        }

    def update(self, data):
        data = UserForm().deserialize(data)
        self.display_name = data.get('display_name')
        self.email = data.get('email')

    @classmethod
    def load(cls, data):
        user = None
        if 'twitter_id' in data:
            user = cls.by_twitter_id(data.get('twitter_id'))
        elif 'facebook_id' in data:
            user = cls.by_facebook_id(data.get('facebook_id'))
        if user is None:
            user = cls()

        user.twitter_id = data.get('twitter_id')
        user.facebook_id = data.get('facebook_id')
        if not user.display_name:
            user.display_name = data.get('display_name')
        if not user.email:
            user.email = data.get('email')
        db.session.add(user)
        return user

    def check_pw(self, pw):
        return self.password == get_hmac(pw)

    @classmethod
    def all(cls):
        q = db.session.query(cls).filter_by(active=True)
        return q

    @classmethod
    def by_id(cls, id):
        q = db.session.query(cls).filter_by(id=int(id))
        return q.first()

    @classmethod
    def by_api_key(cls, api_key):
        q = db.session.query(cls).filter_by(api_key=api_key)
        return q.first()

    @classmethod
    def by_twitter_id(cls, twitter_id):
        q = db.session.query(cls).filter_by(twitter_id=str(twitter_id))
        return q.first()

    @classmethod
    def by_facebook_id(cls, facebook_id):
        q = db.session.query(cls).filter_by(facebook_id=str(facebook_id))
        return q.first()

    @classmethod
    def by_email(cls, email):
        q = db.session.query(cls).filter_by(email=email)
        return q.first()

    @classmethod
    def create_by_email(cls, email, pw):
        src = cls(email=email, password=get_hmac(pw))
        db.session.add(src)
        db.session.commit()
        return src
Пример #13
0
class Document(db.Model, DatedModel, Metadata):
    MAX_TAGS = 10000

    SCHEMA = 'Document'
    SCHEMA_FOLDER = 'Folder'
    SCHEMA_PACKAGE = 'Package'
    SCHEMA_WORKBOOK = 'Workbook'
    SCHEMA_TEXT = 'PlainText'
    SCHEMA_HTML = 'HyperText'
    SCHEMA_PDF = 'Pages'
    SCHEMA_IMAGE = 'Image'
    SCHEMA_AUDIO = 'Audio'
    SCHEMA_VIDEO = 'Video'
    SCHEMA_TABLE = 'Table'
    SCHEMA_EMAIL = 'Email'

    STATUS_PENDING = 'pending'
    STATUS_SUCCESS = 'success'
    STATUS_FAIL = 'fail'

    id = db.Column(db.BigInteger, primary_key=True)
    content_hash = db.Column(db.Unicode(65), nullable=True, index=True)
    foreign_id = db.Column(db.Unicode, unique=False, nullable=True, index=True)
    schema = db.Column(db.String(255), nullable=False)
    status = db.Column(db.Unicode(10), nullable=True)
    meta = db.Column(JSONB, default={})
    error_message = db.Column(db.Unicode(), nullable=True)
    body_text = db.Column(db.Unicode(), nullable=True)
    body_raw = db.Column(db.Unicode(), nullable=True)

    uploader_id = db.Column(db.Integer,
                            db.ForeignKey('role.id'),
                            nullable=True)  # noqa
    parent_id = db.Column(db.BigInteger,
                          db.ForeignKey('document.id'),
                          nullable=True,
                          index=True)  # noqa
    children = db.relationship('Document',
                               lazy='dynamic',
                               backref=db.backref('parent',
                                                  uselist=False,
                                                  remote_side=[id]))  # noqa
    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              nullable=False,
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('documents',
                                                    lazy='dynamic'))  # noqa

    def __init__(self, **kw):
        self.meta = {}
        super(Document, self).__init__(**kw)

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def name(self):
        if self.title is not None:
            return self.title
        if self.file_name is not None:
            return self.file_name
        if self.source_url is not None:
            return self.source_url

    @property
    def supports_records(self):
        # Slightly unintuitive naming: this just checks the document type,
        # not if there actually are any records.
        return self.schema in [self.SCHEMA_PDF, self.SCHEMA_TABLE]

    @property
    def supports_pages(self):
        return self.schema == self.SCHEMA_PDF

    @property
    def supports_nlp(self):
        structural = [
            Document.SCHEMA,
            Document.SCHEMA_PACKAGE,
            Document.SCHEMA_FOLDER,
            Document.SCHEMA_WORKBOOK,
            Document.SCHEMA_VIDEO,
            Document.SCHEMA_AUDIO,
        ]
        return self.schema not in structural

    @property
    def ancestors(self):
        if self.parent_id is None:
            return []
        key = cache.key('ancestors', self.id)
        ancestors = cache.get_list(key)
        if ancestors is not None:
            return ancestors
        ancestors = self.parent.ancestors
        ancestors.append(self.parent_id)
        cache.set_list(key, ancestors)
        return ancestors

    def update(self, data):
        props = ('title', 'summary', 'author', 'crawler', 'source_url',
                 'file_name', 'mime_type', 'headers', 'date', 'authored_at',
                 'modified_at', 'published_at', 'retrieved_at', 'languages',
                 'countries', 'keywords')
        for prop in props:
            value = data.get(prop, self.meta.get(prop))
            setattr(self, prop, value)
        db.session.add(self)

    def update_meta(self):
        flag_modified(self, 'meta')

    def delete_records(self):
        pq = db.session.query(DocumentRecord)
        pq = pq.filter(DocumentRecord.document_id == self.id)
        pq.delete()
        db.session.flush()

    def delete_tags(self):
        pq = db.session.query(DocumentTag)
        pq = pq.filter(DocumentTag.document_id == self.id)
        pq.delete()
        db.session.flush()

    def delete(self, deleted_at=None):
        self.delete_records()
        self.delete_tags()
        db.session.delete(self)

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        documents = db.session.query(cls.id)
        documents = documents.filter(cls.collection_id == collection_id)
        documents = documents.subquery()

        pq = db.session.query(DocumentRecord)
        pq = pq.filter(DocumentRecord.document_id.in_(documents))
        pq.delete(synchronize_session=False)

        pq = db.session.query(DocumentTag)
        pq = pq.filter(DocumentTag.document_id.in_(documents))
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

    def raw_texts(self):
        yield self.title
        yield self.file_name
        yield self.source_url
        yield self.summary
        yield self.author

        if self.status != self.STATUS_SUCCESS:
            return

        yield self.body_text
        if self.supports_records:
            # iterate over all the associated records.
            pq = db.session.query(DocumentRecord)
            pq = pq.filter(DocumentRecord.document_id == self.id)
            pq = pq.order_by(DocumentRecord.index.asc())
            for record in pq.yield_per(10000):
                yield from record.raw_texts()

    @property
    def texts(self):
        yield from filter_texts(self.raw_texts())

    @classmethod
    def by_keys(cls,
                parent_id=None,
                collection_id=None,
                foreign_id=None,
                content_hash=None):
        """Try and find a document by various criteria."""
        q = cls.all()
        q = q.filter(Document.collection_id == collection_id)

        if parent_id is not None:
            q = q.filter(Document.parent_id == parent_id)

        if foreign_id is not None:
            q = q.filter(Document.foreign_id == foreign_id)
        elif content_hash is not None:
            q = q.filter(Document.content_hash == content_hash)
        else:
            raise ValueError("No unique criterion for document.")

        document = q.first()
        if document is None:
            document = cls()
            document.schema = cls.SCHEMA
            document.collection_id = collection_id

        if parent_id is not None:
            document.parent_id = parent_id

        if foreign_id is not None:
            document.foreign_id = foreign_id

        if content_hash is not None:
            document.content_hash = content_hash

        db.session.add(document)
        return document

    @classmethod
    def by_id(cls, id, collection_id=None):
        if id is None:
            return
        q = cls.all()
        q = q.filter(cls.id == id)
        if collection_id is not None:
            q = q.filter(cls.collection_id == collection_id)
        return q.first()

    @classmethod
    def by_collection(cls, collection_id=None):
        q = cls.all()
        q = q.filter(cls.collection_id == collection_id)
        return q

    @classmethod
    def find_ids(cls, collection_id=None, failed_only=False):
        q = cls.all_ids()
        if collection_id is not None:
            q = q.filter(cls.collection_id == collection_id)
        if failed_only:
            q = q.filter(cls.status != cls.STATUS_SUCCESS)
        q = q.order_by(cls.id.asc())
        return q

    def to_proxy(self):
        meta = dict(self.meta)
        headers = meta.pop('headers', {})
        headers = {slugify(k, sep='_'): v for k, v in headers.items()}
        proxy = model.get_proxy({
            'id': str(self.id),
            'schema': self.model,
            'properties': meta
        })
        proxy.set('contentHash', self.content_hash)
        proxy.set('parent', self.parent_id)
        proxy.set('ancestors', self.ancestors)
        proxy.set('fileSize', meta.get('file_size'))
        proxy.set('fileName', meta.get('file_name'))
        if not proxy.has('fileName'):
            disposition = headers.get('content_disposition')
            if disposition is not None:
                _, attrs = cgi.parse_header(disposition)
                proxy.set('fileName', attrs.get('filename'))
        proxy.set('mimeType', meta.get('mime_type'))
        if not proxy.has('mimeType'):
            proxy.set('mimeType', headers.get('content_type'))
        proxy.set('language', meta.get('languages'))
        proxy.set('country', meta.get('countries'))
        proxy.set('authoredAt', meta.get('authored_at'))
        proxy.set('modifiedAt', meta.get('modified_at'))
        proxy.set('publishedAt', meta.get('published_at'))
        proxy.set('retrievedAt', meta.get('retrieved_at'))
        proxy.set('sourceUrl', meta.get('source_url'))
        proxy.set('messageId', meta.get('message_id'), quiet=True)
        proxy.set('inReplyTo', meta.get('in_reply_to'), quiet=True)
        proxy.set('bodyText', self.body_text, quiet=True)
        proxy.set('bodyHtml', self.body_raw, quiet=True)
        columns = meta.get('columns')
        proxy.set('columns', registry.json.pack(columns), quiet=True)
        proxy.set('headers', registry.json.pack(headers), quiet=True)

        pdf = 'application/pdf'
        if meta.get('extension') == 'pdf' or proxy.first('mimeType') == pdf:
            proxy.set('pdfHash', self.content_hash, quiet=True)
        proxy.add('pdfHash', meta.get('pdf_version'), quiet=True)

        q = db.session.query(DocumentTag)
        q = q.filter(DocumentTag.document_id == self.id)
        q = q.filter(DocumentTag.type.in_(DocumentTag.MAPPING.keys()))
        q = q.order_by(DocumentTag.weight.desc())
        q = q.limit(Document.MAX_TAGS)
        for tag in q.all():
            prop = DocumentTag.MAPPING.get(tag.type)
            if prop is not None:
                proxy.add(prop, tag.text)
        return proxy

    def to_dict(self):
        proxy = self.to_proxy()
        data = proxy.to_full_dict()
        data.update(self.to_dict_dates())
        data.update({
            'name': self.name,
            'status': self.status,
            'foreign_id': self.foreign_id,
            'document_id': self.id,
            'collection_id': self.collection_id,
            'error_message': self.error_message,
            'uploader_id': self.uploader_id,
            'bulk': False,
        })
        return data

    def __repr__(self):
        return '<Document(%r,%r,%r)>' % (self.id, self.schema, self.title)
Пример #14
0
class Export(db.Model, IdModel, DatedModel):
    """A data export run in the background. The data is stored in a cloud
    storage bucket and the user is given a link to download the data. The link
    expires after a fixed duration and the exported data is deleted. """

    MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024  # 10 GB
    STATUS_PENDING = "pending"
    STATUS_SUCCESSFUL = "successful"
    STATUS_FAILED = "failed"
    EXPORT_STATUS = {
        STATUS_PENDING: lazy_gettext("pending"),
        STATUS_SUCCESSFUL: lazy_gettext("successful"),
        STATUS_FAILED: lazy_gettext("failed"),
    }
    DEFAULT_STATUS = STATUS_PENDING
    DEFAULT_EXPIRATION = timedelta(days=30)  # After 30 days

    label = db.Column(db.Unicode)

    operation = db.Column(db.Unicode)

    creator_id = db.Column(db.Integer, db.ForeignKey("role.id"))
    creator = db.relationship(Role,
                              backref=db.backref("exports", lazy="dynamic"))
    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True,
                              nullable=True)
    collection = db.relationship(Collection,
                                 backref=db.backref("exports", lazy="dynamic"))

    expires_at = db.Column(db.DateTime, default=None, nullable=True)
    deleted = db.Column(db.Boolean, default=False)
    export_status = db.Column(db.Unicode, default=DEFAULT_STATUS)

    content_hash = db.Column(db.Unicode(65), index=True, nullable=True)
    file_size = db.Column(db.BigInteger, nullable=True)  # In bytes
    file_name = db.Column(db.Unicode, nullable=True)
    mime_type = db.Column(db.Unicode)
    meta = db.Column(JSONB, default={})

    def to_dict(self):
        data = self.to_dict_dates()
        if self.export_status in self.EXPORT_STATUS:
            data["export_status"] = self.EXPORT_STATUS.get(self.export_status)
        data.update({
            "id": stringify(self.id),
            "label": self.label,
            "operation": self.operation,
            "creator_id": stringify(self.creator_id),
            "collection_id": self.collection_id,
            "expires_at": self.expires_at,
            "deleted": self.deleted,
            "export_status": self.export_status,
            "content_hash": self.content_hash,
            "file_size": self.file_size,
            "file_name": self.file_name,
            "meta": self.meta,
        })
        return data

    @classmethod
    def create(
        cls,
        operation,
        role_id,
        label,
        file_path=None,
        expires_after=None,
        collection=None,
        mime_type=None,
    ):
        export = cls()
        export.creator_id = role_id
        export.operation = operation
        export.label = label
        if file_path is not None:
            export.set_filepath(file_path)
        if collection is not None:
            export.collection_id = collection.id
        export.mime_type = mime_type
        export.expires_at = datetime.utcnow() + (expires_after
                                                 or cls.DEFAULT_EXPIRATION)
        db.session.add(export)
        return export

    @property
    def namespace(self):
        return make_key("role", self.creator_id)

    def publish(self):
        if not self._file_path:
            raise RuntimeError("file path not present for export: %r", self)
        # Use contenthash as filename to make to ensure uniqueness
        path = Path(self._file_path.parent, self.content_hash)
        self._file_path.rename(path)
        try:
            archive.publish(self.namespace, path, self.mime_type)
            self.set_status(status=Export.STATUS_SUCCESSFUL)
        except Exception as ex:
            self.set_status(status=Export.STATUS_FAILED)
            raise ex

    def set_filepath(self, file_path):
        file_path = ensure_path(file_path)
        file_name = safe_filename(file_path)
        file_size = file_path.stat().st_size
        self.file_name = file_name
        self.file_size = file_size
        self._file_path = file_path
        self.content_hash = checksum(file_path)

    def set_status(self, status):
        if status in self.EXPORT_STATUS:
            self.export_status = status
            db.session.add(self)

    def delete_publication(self):
        if self._should_delete_publication():
            archive.delete_publication(self.namespace, self.content_hash)
        self.deleted = True
        db.session.add(self)

    def _should_delete_publication(self):
        """Check whether the published export should be deleted from the archive

        Since we store exports by contenthash, there may be other non-expired exports
        that point to the same file in the archive"""
        q = (Export.all().filter(
            Export.content_hash == self.content_hash).filter(
                Export.deleted.isnot(True)).filter(Export.id != self.id))
        return q.first() is None

    @classmethod
    def get_expired(cls, deleted=False):
        now = datetime.utcnow()
        q = cls.all().filter(
            cls.expires_at.isnot(None)).filter(cls.expires_at <= now)
        if deleted is not None:
            q = q.filter(cls.deleted == deleted)
        return q

    @classmethod
    def by_id(cls, id, role_id=None, deleted=False):
        q = cls.all().filter_by(id=id)
        if role_id is not None:
            q = q.filter(cls.creator_id == role_id)
        if not deleted:
            q = q.filter(cls.deleted == False)
        return q.first()

    @classmethod
    def by_role_id(cls, role_id, deleted=False):
        q = cls.all()
        q = q.filter(cls.creator_id == role_id)
        if not deleted:
            q = q.filter(cls.deleted == False)
        q = q.order_by(cls.created_at.desc())
        return q

    def __repr__(self):
        return "<Export(%r, %r)>" % (self.id, self.creator_id)
Пример #15
0
class Role(db.Model, IdModel, SoftDeleteModel):
    """A user, group or other access control subject."""

    _schema = 'role.json#'
    __tablename__ = 'role'

    USER = '******'
    GROUP = 'group'
    SYSTEM = 'system'
    TYPES = [USER, GROUP, SYSTEM]

    SYSTEM_GUEST = 'guest'
    SYSTEM_USER = '******'

    foreign_id = db.Column(db.Unicode(2048), nullable=False, unique=True)
    name = db.Column(db.Unicode, nullable=False)
    email = db.Column(db.Unicode, nullable=True)
    api_key = db.Column(db.Unicode, nullable=True)
    is_admin = db.Column(db.Boolean, nullable=False, default=False)
    type = db.Column(db.Enum(*TYPES, name='role_type'), nullable=False)
    permissions = db.relationship("Permission", backref="role")

    def update(self, data):
        validate(data, self._schema)
        self.name = data.get('name', self.name)
        self.email = data.get('email', self.email)

    def clear_roles(self):
        self.roles = []
        db.session.add(self)

    def add_role(self, role):
        self.roles.append(role)
        db.session.add(role)
        db.session.add(self)

    @classmethod
    def notifiable(cls):
        return cls.all_ids().filter(cls.email != None)  # noqa

    @classmethod
    def by_foreign_id(cls, foreign_id):
        if foreign_id is not None:
            return cls.all().filter_by(foreign_id=foreign_id).first()

    @classmethod
    def by_api_key(cls, api_key):
        if api_key is not None:
            return cls.all().filter_by(api_key=api_key).first()

    @classmethod
    def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None):
        role = cls.by_foreign_id(foreign_id)
        if role is None:
            role = cls()
            role.foreign_id = foreign_id
            role.name = name
            role.type = type
            role.is_admin = False

        if role.api_key is None:
            role.api_key = uuid4().hex

        role.email = email
        if is_admin is not None:
            role.is_admin = is_admin

        # see: https://github.com/pudo/aleph/issues/111
        auto_admins = get_config('AUTHZ_ADMINS') or ''
        auto_admins = [a.lower() for a in auto_admins.split(',')]
        if email is not None and email.lower() in auto_admins:
            role.is_admin = True

        db.session.add(role)
        db.session.flush()
        return role

    @classmethod
    def load_id(cls, foreign_id, type=None, name=None):
        """Load a role and return the ID.

        If type is given and no role is found, a new role will be created.
        """
        if not hasattr(current_app, '_authz_roles'):
            current_app._authz_roles = {}
        if foreign_id not in current_app._authz_roles:
            role = cls.by_foreign_id(foreign_id)
            if role is None:
                if type is None:
                    return
                name = name or foreign_id
                role = cls.load_or_create(foreign_id, type, name)
            current_app._authz_roles[foreign_id] = role.id
        return current_app._authz_roles[foreign_id]

    def __repr__(self):
        return '<Role(%r,%r)>' % (self.id, self.foreign_id)

    def __unicode__(self):
        return self.name

    def to_dict(self):
        data = super(Role, self).to_dict()
        data.update({
            'api_url': url_for('roles_api.view', id=self.id),
            'foreign_id': self.foreign_id,
            'is_admin': self.is_admin,
            'email': self.email,
            'name': self.name,
            'type': self.type
        })
        return data
Пример #16
0
class Export(db.Model, IdModel, DatedModel):
    """A data export run in the background. The data is stored in a cloud
    storage bucket and the user is given a link to download the data. The link
    expires after a fixed duration and the exported data is deleted."""

    DEFAULT_EXPIRATION = timedelta(days=30)  # After 30 days

    label = db.Column(db.Unicode)
    operation = db.Column(db.Unicode)
    creator_id = db.Column(db.Integer, db.ForeignKey("role.id"))
    creator = db.relationship(Role,
                              backref=db.backref("exports", lazy="dynamic"))
    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True,
                              nullable=True)
    collection = db.relationship(Collection,
                                 backref=db.backref("exports", lazy="dynamic"))

    expires_at = db.Column(db.DateTime, default=None, nullable=True)
    deleted = db.Column(db.Boolean, default=False)
    status = db.Column("export_status", db.Unicode, default=Status.DEFAULT)

    content_hash = db.Column(db.Unicode(65), index=True, nullable=True)
    file_size = db.Column(db.BigInteger, nullable=True)  # In bytes
    file_name = db.Column(db.Unicode, nullable=True)
    mime_type = db.Column(db.Unicode)
    meta = db.Column(JSONB, default={})

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            "id": stringify(self.id),
            "label": self.label,
            "operation": self.operation,
            "creator_id": stringify(self.creator_id),
            "collection_id": self.collection_id,
            "expires_at": self.expires_at,
            "deleted": self.deleted,
            "status": Status.LABEL.get(self.status),
            "content_hash": self.content_hash,
            "file_size": self.file_size,
            "file_name": self.file_name,
            "mime_type": self.mime_type,
            "meta": self.meta,
        })
        return data

    @classmethod
    def create(cls,
               operation,
               role_id,
               label,
               collection=None,
               mime_type=None,
               meta=None):
        export = cls()
        export.creator_id = role_id
        export.operation = operation
        export.label = label
        if collection is not None:
            export.collection_id = collection.id
        export.mime_type = mime_type
        export.expires_at = datetime.utcnow() + cls.DEFAULT_EXPIRATION
        export.meta = meta or {}
        db.session.add(export)
        return export

    @property
    def namespace(self):
        return make_key("role", self.creator_id)

    def set_status(self, status):
        self.status = status
        db.session.add(self)

    def should_delete_publication(self):
        """Check whether the published export should be deleted from the archive

        Since we store exports by contenthash, there may be other non-expired exports
        that point to the same file in the archive"""
        q = (Export.all().filter(
            Export.content_hash == self.content_hash).filter(
                Export.deleted.isnot(True)).filter(Export.id != self.id))
        return q.first() is None

    @classmethod
    def get_expired(cls, deleted=False):
        now = datetime.utcnow()
        q = cls.all()
        q = q.filter(cls.expires_at <= now)
        if not deleted:
            q = q.filter(cls.deleted == deleted)
        return q

    @classmethod
    def get_pending(cls):
        q = cls.all()
        q = q.filter(cls.status == Status.PENDING)
        q = q.filter(cls.deleted == False)  # noqa
        return q

    @classmethod
    def by_id(cls, id, role_id=None, deleted=False):
        q = cls.all().filter_by(id=id)
        if role_id is not None:
            q = q.filter(cls.creator_id == role_id)
        if not deleted:
            q = q.filter(cls.deleted == False)  # noqa
        return q.first()

    @classmethod
    def by_role_id(cls, role_id, deleted=False):
        q = cls.all()
        q = q.filter(cls.creator_id == role_id)
        if not deleted:
            q = q.filter(cls.deleted == False)  # noqa
        q = q.order_by(cls.created_at.desc())
        return q

    @classmethod
    def by_content_hash(cls, content_hash, deleted=False):
        q = cls.all()
        q = q.filter(cls.content_hash == content_hash)
        if not deleted:
            q = q.filter(cls.deleted == False)  # noqa
        return q

    def __repr__(self):
        return "<Export(%r, %r, %r)>" % (self.id, self.creator_id, self.label)
Пример #17
0
class Document(db.Model, DatedModel):
    SCHEMA = 'Document'
    SCHEMA_FOLDER = 'Folder'
    SCHEMA_TABLE = 'Table'

    id = db.Column(db.BigInteger, primary_key=True)
    content_hash = db.Column(db.Unicode(65), nullable=True, index=True)
    foreign_id = db.Column(db.Unicode, unique=False, nullable=True, index=True)
    schema = db.Column(db.String(255), nullable=False)
    meta = db.Column(JSONB, default={})

    uploader_id = db.Column(db.Integer,
                            db.ForeignKey('role.id'),
                            nullable=True)  # noqa
    parent_id = db.Column(db.BigInteger,
                          db.ForeignKey('document.id'),
                          nullable=True,
                          index=True)  # noqa
    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              nullable=False,
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('documents',
                                                    lazy='dynamic'))  # noqa

    def __init__(self, **kw):
        self.meta = {}
        super(Document, self).__init__(**kw)

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def ancestors(self):
        if self.parent_id is None:
            return []
        key = cache.key('ancestors', self.id)
        ancestors = cache.get_list(key)
        if len(ancestors):
            return ancestors
        parent_key = cache.key('ancestors', self.parent_id)
        ancestors = cache.get_list(parent_key)
        if not len(ancestors):
            ancestors = []
            parent = Document.by_id(self.parent_id)
            if parent is not None:
                ancestors = parent.ancestors
        ancestors.append(self.parent_id)
        if self.model.is_a(model.get(self.SCHEMA_FOLDER)):
            cache.set_list(key, ancestors, expire=cache.EXPIRE)
        return ancestors

    def update(self, data):
        props = ('title', 'summary', 'author', 'crawler', 'source_url',
                 'file_name', 'mime_type', 'headers', 'date', 'authored_at',
                 'modified_at', 'published_at', 'retrieved_at', 'languages',
                 'countries', 'keywords')
        for prop in props:
            self.meta[prop] = data.get(prop, self.meta.get(prop))
        flag_modified(self, 'meta')

    def delete(self, deleted_at=None):
        db.session.delete(self)

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

    @classmethod
    def save(cls,
             collection,
             parent=None,
             foreign_id=None,
             content_hash=None,
             meta=None,
             uploader_id=None):
        """Try and find a document by various criteria."""
        q = cls.all()
        q = q.filter(Document.collection_id == collection.id)

        if parent is not None:
            q = q.filter(Document.parent_id == parent.id)
        if foreign_id is not None:
            q = q.filter(Document.foreign_id == foreign_id)
        elif content_hash is not None:
            q = q.filter(Document.content_hash == content_hash)
        else:
            raise ValueError("No unique criterion for document.")

        document = q.first()
        if document is None:
            document = cls()
            document.schema = cls.SCHEMA
            document.collection_id = collection.id
            document.uploader_id = uploader_id

        if parent is not None:
            document.parent_id = parent.id

        if foreign_id is not None:
            document.foreign_id = foreign_id

        document.content_hash = content_hash
        if content_hash is None:
            document.schema = cls.SCHEMA_FOLDER

        if meta is not None:
            document.update(meta)

        db.session.add(document)
        return document

    @classmethod
    def by_id(cls, id, collection_id=None):
        try:
            id = int(id)
        except Exception:
            return
        q = cls.all()
        q = q.filter(cls.id == id)
        if collection_id is not None:
            q = q.filter(cls.collection_id == collection_id)
        return q.first()

    @classmethod
    def by_collection(cls, collection_id=None):
        q = cls.all()
        q = q.filter(cls.collection_id == collection_id)
        return q

    @classmethod
    def cleanup_deleted(cls):
        q = db.session.query(Collection.id)
        q = q.filter(Collection.deleted_at != None)  # noqa
        collection_ids = [c for (c, ) in q.all()]
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id.in_(collection_ids))
        pq.delete(synchronize_session=False)

    def to_proxy(self):
        proxy = model.get_proxy({
            'id': str(self.id),
            'schema': self.model,
            'properties': {}
        })
        meta = dict(self.meta)
        headers = meta.pop('headers', {}) or {}
        headers = {slugify(k, sep='_'): v for k, v in headers.items()}
        proxy.set('contentHash', self.content_hash)
        proxy.set('parent', self.parent_id)
        proxy.set('ancestors', self.ancestors)
        proxy.set('crawler', meta.get('crawler'))
        proxy.set('sourceUrl', meta.get('source_url'))
        proxy.set('title', meta.get('title'))
        proxy.set('fileName', meta.get('file_name'))
        if not proxy.has('fileName'):
            disposition = headers.get('content_disposition')
            if disposition is not None:
                _, attrs = cgi.parse_header(disposition)
                proxy.set('fileName', attrs.get('filename'))
        proxy.set('mimeType', meta.get('mime_type'))
        if not proxy.has('mimeType'):
            proxy.set('mimeType', headers.get('content_type'))
        proxy.set('language', meta.get('languages'))
        proxy.set('country', meta.get('countries'))
        proxy.set('keywords', meta.get('keywords'))
        proxy.set('headers', registry.json.pack(headers), quiet=True)
        proxy.set('authoredAt', meta.get('authored_at'))
        proxy.set('modifiedAt', meta.get('modified_at'))
        proxy.set('publishedAt', meta.get('published_at'))
        proxy.set('retrievedAt', meta.get('retrieved_at'))
        proxy.set('indexUpdatedAt', self.created_at)
        proxy.set('sourceUrl', meta.get('source_url'))
        return proxy

    def __repr__(self):
        return '<Document(%r,%r)>' % (self.id, self.schema)
Пример #18
0
class Entity(db.Model, UuidModel, SoftDeleteModel):
    STATE_ACTIVE = 'active'
    STATE_PENDING = 'pending'
    STATE_DELETED = 'deleted'

    name = db.Column(db.Unicode)
    type = db.Column(db.String(255), index=True)
    state = db.Column(db.String(128),
                      nullable=True,
                      default=STATE_ACTIVE,
                      index=True)  # noqa
    foreign_ids = db.Column(ARRAY(db.Unicode()))
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    def delete_references(self, origin=None):
        pq = db.session.query(Reference)
        pq = pq.filter(Reference.entity_id == self.id)
        if origin is not None:
            pq = pq.filter(Reference.origin == origin)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete_identities(self):
        pq = db.session.query(EntityIdentity)
        pq = pq.filter(EntityIdentity.entity_id == self.id)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_references()
        self.delete_identities()
        deleted_at = deleted_at or datetime.utcnow()
        for alert in self.alerts:
            alert.delete(deleted_at=deleted_at)
        self.state = self.STATE_DELETED
        super(Entity, self).delete(deleted_at=deleted_at)

    @classmethod
    def delete_dangling(cls, collection_id):
        """Delete dangling entities.

        Entities can dangle in pending state while they have no references
        pointing to them, thus making it impossible to enable them. This is
        a routine cleanup function.
        """
        q = db.session.query(cls)
        q = q.filter(cls.collection_id == collection_id)
        q = q.filter(cls.state == cls.STATE_PENDING)
        q = q.outerjoin(Reference)
        q = q.group_by(cls)
        q = q.having(func.count(Reference.id) == 0)
        for entity in q.all():
            entity.delete()

    def merge(self, other):
        if self.id == other.id:
            raise ValueError("Cannot merge an entity with itself.")
        if self.collection_id != other.collection_id:
            raise ValueError(
                "Cannot merge entities from different collections.")  # noqa

        data = merge_data(self.data, other.data)
        if self.name.lower() != other.name.lower():
            data = merge_data(data, {'alias': [other.name]})

        self.data = data
        self.state = self.STATE_ACTIVE
        self.foreign_ids = self.foreign_ids or []
        self.foreign_ids += other.foreign_ids or []
        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()

        # update alerts
        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({'entity_id': self.id})

        # update document references
        from aleph.model.reference import Reference
        q = db.session.query(Reference).filter(Reference.entity_id == other.id)
        q.update({'entity_id': self.id})

        # delete source entities
        other.delete()
        db.session.add(self)
        db.session.commit()
        db.session.refresh(other)

    def update(self, entity):
        data = entity.get('data') or {}
        data['name'] = entity.get('name')
        self.data = self.schema.validate(data)
        self.name = self.data.pop('name')
        fid = [string_value(f) for f in entity.get('foreign_ids') or []]
        self.foreign_ids = list(set([f for f in fid if f is not None]))
        self.state = entity.pop('state', self.STATE_ACTIVE)
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    @classmethod
    def save(cls, data, collection, merge=False):
        ent = cls.by_id(data.get('id'))
        if ent is None:
            ent = cls()
            ent.type = data.pop('schema', None)
            if ent.type is None:
                raise ValueError("No schema provided.")
            ent.id = make_textid()

        if merge:
            data = merge_data(data, ent.to_dict())

        if collection is None:
            raise ValueError("No collection specified.")

        ent.collection = collection
        ent.update(data)
        return ent

    @classmethod
    def filter_collections(cls, q, collections=None):
        if collections is None:
            return q
        collection_ids = []
        for collection in collections:
            if isinstance(collection, Collection):
                collection = collection.id
            collection_ids.append(collection)
        q = q.filter(Entity.collection_id.in_(collection_ids))
        return q

    @classmethod
    def by_id_set(cls, ids, collections=None):
        if not len(ids):
            return {}
        q = cls.all()
        q = cls.filter_collections(q, collections=collections)
        q = q.options(joinedload('collection'))
        q = q.filter(cls.id.in_(ids))
        entities = {}
        for ent in q:
            entities[ent.id] = ent
        return entities

    @classmethod
    def by_foreign_id(cls, foreign_id, collection_id, deleted=False):
        foreign_id = string_value(foreign_id)
        if foreign_id is None:
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        foreign_id = func.cast([foreign_id], ARRAY(db.Unicode()))
        q = q.filter(cls.foreign_ids.contains(foreign_id))
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def latest(cls):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.state == cls.STATE_ACTIVE)
        return q.scalar()

    @property
    def schema(self):
        return schemata.get(self.type)

    @property
    def terms(self):
        terms = set([self.name])
        for alias in ensure_list(self.data.get('alias')):
            if alias is not None and len(alias):
                terms.add(alias)
        return terms

    @property
    def regex_terms(self):
        # This is to find the shortest possible regex for each entity.
        # If, for example, and entity matches both "Al Qaeda" and
        # "Al Qaeda in Iraq, Syria and the Levant", it is useless to
        # search for the latter.
        terms = set([normalize_strong(t) for t in self.terms])
        regex_terms = set()
        for term in terms:
            if term is None or len(term) < 4 or len(term) > 120:
                continue
            contained = False
            for other in terms:
                if other is None or other == term:
                    continue
                if other in term:
                    contained = True
            if not contained:
                regex_terms.add(term)
        return regex_terms

    def to_dict(self):
        data = super(Entity, self).to_dict()
        data.update({
            'schema': self.type,
            'name': self.name,
            'state': self.state,
            'data': self.data,
            'foreign_ids': self.foreign_ids or [],
            'collection_id': self.collection_id
        })
        return data

    def to_index(self):
        entity = self.to_dict()
        entity['properties'] = {'name': [self.name]}
        for k, v in self.data.items():
            v = ensure_list(v)
            if len(v):
                entity['properties'][k] = v
        return entity

    def to_ref(self):
        return {
            'id': self.id,
            'label': self.name,
            'schema': self.type,
            'collection_id': self.collection_id
        }

    def __unicode__(self):
        return self.name

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)
Пример #19
0
class Document(db.Model, DatedModel):
    _schema = 'document.json#'

    SCHEMA = 'Document'

    TYPE_TEXT = 'text'
    TYPE_TABULAR = 'tabular'
    TYPE_OTHER = 'other'

    STATUS_PENDING = 'pending'
    STATUS_SUCCESS = 'success'
    STATUS_FAIL = 'fail'

    id = db.Column(db.BigInteger, primary_key=True)
    content_hash = db.Column(db.Unicode(65), nullable=False, index=True)
    foreign_id = db.Column(db.Unicode, unique=False, nullable=True)
    type = db.Column(db.Unicode(10), nullable=False, index=True)
    status = db.Column(db.Unicode(10), nullable=True, index=True)
    _meta = db.Column('meta', JSONB)

    crawler = db.Column(db.Unicode(), index=True)
    crawler_run = db.Column(db.Unicode())
    error_type = db.Column(db.Unicode(), nullable=True)
    error_message = db.Column(db.Unicode(), nullable=True)
    error_details = db.Column(db.Unicode(), nullable=True)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              nullable=False,
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('documents',
                                                    lazy='dynamic'))  # noqa

    @property
    def title(self):
        return self.meta.title

    @hybrid_property
    def meta(self):
        self._meta = self._meta or {}
        self._meta['content_hash'] = self.content_hash
        self._meta['foreign_id'] = self.foreign_id
        self._meta['crawler'] = self.crawler
        self._meta['crawler_run'] = self.crawler_run
        return Metadata.from_data(self._meta or {})

    @meta.setter
    def meta(self, meta):
        if isinstance(meta, Metadata):
            self.content_hash = meta.content_hash
            self.foreign_id = meta.foreign_id
            self.crawler = meta.crawler
            self.crawler_run = meta.crawler_run
            meta = meta.to_attr_dict()
        self._meta = meta
        flag_modified(self, '_meta')

    def update(self, data):
        validate(data, self._schema)
        meta = self.meta
        meta.update(data, safe=True)
        self.meta = meta
        db.session.add(self)

    def delete_pages(self):
        pq = db.session.query(DocumentPage)
        pq = pq.filter(DocumentPage.document_id == self.id)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete_records(self):
        pq = db.session.query(DocumentRecord)
        pq = pq.filter(DocumentRecord.document_id == self.id)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete_references(self, origin=None):
        pq = db.session.query(Reference)
        pq = pq.filter(Reference.document_id == self.id)
        if origin is not None:
            pq = pq.filter(Reference.origin == origin)
        pq.delete(synchronize_session='fetch')
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_references()
        self.delete_records()
        self.delete_pages()
        db.session.delete(self)

    def insert_records(self, sheet, iterable, chunk_size=1000):
        chunk = []
        for i, data in enumerate(iterable):
            chunk.append({
                'document_id': self.id,
                'row_id': i,
                'sheet': sheet,
                'data': data
            })
            if len(chunk) >= chunk_size:
                db.session.bulk_insert_mappings(DocumentRecord, chunk)
                chunk = []

        if len(chunk):
            db.session.bulk_insert_mappings(DocumentRecord, chunk)

    def text_parts(self):
        if self.type == self.TYPE_TEXT:
            for page in self.pages:
                for text in page.text_parts():
                    yield text
        elif self.type == self.TYPE_TABULAR:
            for record in self.records:
                for text in record.text_parts():
                    yield text

    @classmethod
    def crawler_last_run(cls, crawler_id):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.crawler == crawler_id)
        return q.scalar()

    @classmethod
    def is_crawler_active(cls, crawler_id):
        # TODO: add a function to see if a particular crawl is still running
        # this should be defined as having "pending" documents.
        last_run_time = cls.crawler_last_run(crawler_id)
        if last_run_time is None:
            return False
        return last_run_time > (datetime.utcnow() - timedelta(hours=1))

    @classmethod
    def crawler_stats(cls, crawler_id):
        # Check if the crawler was active very recently, if so, don't
        # allow the user to execute a new run right now.
        stats = {
            'updated': cls.crawler_last_run(crawler_id),
            'running': cls.is_crawler_active(crawler_id)
        }

        q = db.session.query(cls.status, func.count(cls.id))
        q = q.filter(cls.crawler == crawler_id)
        q = q.group_by(cls.status)
        for (status, count) in q.all():
            stats[status] = count
        return stats

    def _add_to_dict(self, data):
        try:
            from flask import request
            source_id = self.collection_id
            data['public'] = request.authz.collection_public(source_id)
        except:
            data['public'] = None
        data.update({
            'id': self.id,
            'type': self.type,
            'status': self.status,
            'error_type': self.error_type,
            'error_message': self.error_message,
            'error_details': self.error_details,
            'collection_id': self.collection_id,
            'created_at': self.created_at,
            'updated_at': self.updated_at
        })
        return data

    def to_dict(self):
        data = self.meta.to_dict()
        return self._add_to_dict(data)

    def to_index_dict(self):
        data = self.meta.to_index_dict()
        data['text'] = index_form(self.text_parts())
        data['schema'] = self.SCHEMA
        data['schemata'] = [self.SCHEMA]
        data['name_sort'] = ascii_text(data.get('title'))
        data['title_latin'] = ascii_text(data.get('title'))
        data['summary_latin'] = ascii_text(data.get('summary'))
        return self._add_to_dict(data)

    def __repr__(self):
        return '<Document(%r,%r,%r)>' % (self.id, self.type, self.title)
Пример #20
0
class Alert(db.Model, SoftDeleteModel):
    """A subscription to notifications on a given query."""
    __tablename__ = 'alert'

    id = db.Column(db.Integer, primary_key=True)
    query = db.Column(db.Unicode, nullable=True)
    notified_at = db.Column(db.DateTime, nullable=True)

    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    role = db.relationship(Role, backref=db.backref('alerts',
                                                    lazy='dynamic'))  # noqa

    @property
    def normalized(self):
        return normalize(self.query)

    def delete(self, deleted_at=None):
        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def update(self):
        self.notified_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def is_same(self, other):
        if other.role_id != self.role_id:
            return False
        if other.normalized != self.normalized:
            return False
        return True

    @classmethod
    def by_id(cls, id, role_id=None):
        q = cls.all().filter_by(id=id)
        if role_id is not None:
            q = q.filter(cls.role_id == role_id)
        return q.first()

    @classmethod
    def by_role_id(cls, role_id):
        q = cls.all()
        q = q.filter(cls.role_id == role_id)
        q = q.order_by(cls.created_at.desc())
        q = q.order_by(cls.id.desc())
        return q

    @classmethod
    def create(cls, data, role_id):
        alert = cls()
        alert.role_id = role_id
        alert.query = stringify(data.get('query'))
        alert.update()
        return alert

    @classmethod
    def dedupe(cls):
        alerts = cls.all()
        for (left, right) in permutations(alerts, 2):
            if left.id >= right.id:
                continue
            if left.is_same(right):
                left.delete()

    def __repr__(self):
        return '<Alert(%r, %r)>' % (self.id, self.query)
Пример #21
0
class Collection(db.Model, IdModel, SoftDeleteModel):
    """A set of documents and entities against which access control is
    enforced."""

    # Category schema for collections.
    # TODO: add extra weight info.
    # TODO: should this be configurable?
    CATEGORIES = {
        'news': 'News archives',
        'leak': 'Leaks',
        'land': 'Land registry',
        'gazette': 'Gazettes',
        'court': 'Court archives',
        'company': 'Company registries',
        'watchlist': 'Watchlists',
        'investigation': 'Personal collections',
        'sanctions': 'Sanctions lists',
        'scrape': 'Scrapes',
        'procurement': 'Procurement',
        'grey': 'Grey literature',
        'license': 'Licenses and concessions',
        'regulatory': 'Regulatory filings',
        'other': 'Other material'
    }

    label = db.Column(db.Unicode)
    summary = db.Column(db.Unicode, nullable=True)
    category = db.Column(db.Unicode, nullable=True)
    countries = db.Column(ARRAY(db.Unicode()), default=[])
    languages = db.Column(ARRAY(db.Unicode()), default=[])
    foreign_id = db.Column(db.Unicode, unique=True, nullable=False)

    # Managed collections are generated by API crawlers and thus UI users
    # shouldn't be allowed to add entities or documents to them. They also
    # don't use advanced entity extraction features for performance reasons.
    managed = db.Column(db.Boolean, default=False)

    creator_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)
    creator = db.relationship(Role)

    def update(self, data, creator=None):
        self.label = data.get('label', self.label)
        self.summary = data.get('summary', self.summary)
        self.category = data.get('category', self.category)
        self.managed = data.get('managed', False)
        self.countries = data.get('countries', [])
        if creator is None:
            creator = Role.by_id(data.get('creator_id'))
        self.creator = creator
        self.updated_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()
        if creator is not None:
            Permission.grant(self, creator, True, True)

    @property
    def roles(self):
        if not hasattr(self, '_roles'):
            q = db.session.query(Permission.role_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.collection_id == self.id)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            self._roles = [e.role_id for e in q.all()]
        return self._roles

    @classmethod
    def by_foreign_id(cls, foreign_id, deleted=False):
        if foreign_id is None:
            return
        q = cls.all(deleted=deleted)
        return q.filter(cls.foreign_id == foreign_id).first()

    @classmethod
    def all_by_ids(cls, ids, deleted=False, authz=None):
        q = super(Collection, cls).all_by_ids(ids, deleted=deleted)
        if authz is not None and not authz.is_admin:
            q = q.join(Permission, cls.id == Permission.collection_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            q = q.filter(Permission.role_id.in_(authz.roles))
        return q

    @classmethod
    def create(cls, data, role=None):
        foreign_id = data.get('foreign_id') or make_textid()
        collection = cls.by_foreign_id(foreign_id, deleted=True)
        if collection is None:
            collection = cls()
            collection.foreign_id = foreign_id
            collection.update(data, creator=role)
        collection.deleted_at = None
        return collection

    def __repr__(self):
        return '<Collection(%r, %r, %r)>' % \
            (self.id, self.foreign_id, self.label)
Пример #22
0
class Entity(db.Model, DatedModel):
    THING = "Thing"
    LEGAL_ENTITY = "LegalEntity"

    id = db.Column(
        db.String(ENTITY_ID_LEN),
        primary_key=True,
        default=make_textid,
        nullable=False,
        unique=False,
    )
    schema = db.Column(db.String(255), index=True)
    data = db.Column("data", JSONB)

    role_id = db.Column(db.Integer, db.ForeignKey("role.id"),
                        nullable=True)  # noqa
    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)
    collection = db.relationship(Collection,
                                 backref=db.backref("entities",
                                                    lazy="dynamic"))

    @property
    def model(self):
        return model.get(self.schema)

    def update(self, data, collection):
        proxy = model.get_proxy(data, cleaned=False)
        proxy = collection.ns.apply(proxy)
        self.id = collection.ns.sign(self.id)
        self.schema = proxy.schema.name
        self.updated_at = datetime.utcnow()
        previous = self.to_proxy()
        for prop in proxy.schema.properties.values():
            # Do not allow the user to overwrite hashes because this could
            # lead to a user accessing random objects.
            if prop.type == registry.checksum:
                prev = previous.get(prop)
                proxy.set(prop, prev, cleaned=True, quiet=True)
        self.data = proxy.properties
        db.session.add(self)

    def to_proxy(self):
        data = {
            "id": self.id,
            "schema": self.schema,
            "properties": self.data,
            "created_at": iso_text(self.created_at),
            "updated_at": iso_text(self.updated_at),
            "role_id": self.role_id,
            "mutable": True,
        }
        return model.get_proxy(data, cleaned=False)

    @classmethod
    def create(cls, data, collection, role_id=None):
        entity = cls()
        entity_id = data.get("id") or make_textid()
        if not registry.entity.validate(entity_id):
            raise InvalidData(gettext("Invalid entity ID"))
        entity.id = collection.ns.sign(entity_id)
        entity.collection_id = collection.id
        entity.role_id = role_id
        entity.update(data, collection)
        return entity

    @classmethod
    def by_id(cls, entity_id, collection=None):
        q = cls.all().filter(cls.id == entity_id)
        if collection is not None:
            q = q.filter(cls.collection_id == collection.id)
        return q.first()

    @classmethod
    def by_collection(cls, collection_id):
        q = cls.all()
        q = q.filter(Entity.collection_id == collection_id)
        q = q.yield_per(5000)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id):
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq.delete(synchronize_session=False)

    def __repr__(self):
        return "<Entity(%r, %r)>" % (self.id, self.schema)
Пример #23
0
class Collection(db.Model, IdModel, SoftDeleteModel):
    """A set of documents and entities against which access control is
    enforced."""
    label = db.Column(db.Unicode)
    summary = db.Column(db.Unicode, nullable=True)
    category = db.Column(db.Unicode, nullable=True)
    countries = db.Column(ARRAY(db.Unicode()), default=[])
    languages = db.Column(ARRAY(db.Unicode()), default=[])
    foreign_id = db.Column(db.Unicode, unique=True, nullable=False)

    # Managed collections are generated by API crawlers and thus UI users
    # shouldn't be allowed to add entities or documents to them. They also
    # don't use advanced entity extraction features for performance reasons.
    managed = db.Column(db.Boolean, default=False)

    creator_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)
    creator = db.relationship(Role)

    def update(self, data):
        self.label = data.get('label', self.label)
        self.summary = data.get('summary', self.summary)
        self.category = data.get('category', self.category)
        self.managed = data.get('managed', False)
        self.countries = data.get('countries', [])
        creator = data.get('creator') or {}
        self.update_creator(creator.get('id'))
        self.touch()

    def update_creator(self, role):
        """Set the creator (and admin) of a collection."""
        if not isinstance(role, Role):
            role = Role.by_id(role)
        if role is None or role.type != Role.USER:
            return
        self.creator = role
        db.session.add(self)
        db.session.flush()
        Permission.grant(self, role, True, True)

    def touch(self):
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    def delete_matches(self):
        pq = db.session.query(Match)
        pq = pq.filter(
            or_(Match.collection_id == self.id,
                Match.match_collection_id == self.id))
        pq.delete(synchronize_session=False)

    def delete_permissions(self, deleted_at):
        pq = db.session.query(Permission)
        pq = pq.filter(Permission.collection_id == self.id)
        pq.update({Permission.deleted_at: deleted_at},
                  synchronize_session=False)

    def delete(self, deleted_at=None):
        self.delete_matches()
        self.delete_permissions(deleted_at=deleted_at)
        super(Collection, self).delete(deleted_at=deleted_at)

    @property
    def roles(self):
        if not hasattr(self, '_roles'):
            q = db.session.query(Permission.role_id)
            q = q.filter(Permission.collection_id == self.id)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            self._roles = [e.role_id for e in q.all()]
        return self._roles

    @classmethod
    def by_foreign_id(cls, foreign_id, deleted=False):
        if foreign_id is None:
            return
        q = cls.all(deleted=deleted)
        return q.filter(cls.foreign_id == foreign_id).first()

    @classmethod
    def all_by_ids(cls, ids, deleted=False, authz=None):
        q = super(Collection, cls).all_by_ids(ids, deleted=deleted)
        if authz is not None and not authz.is_admin:
            q = q.join(Permission, cls.id == Permission.collection_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            q = q.filter(Permission.role_id.in_(authz.roles))
        return q

    @classmethod
    def create(cls, data, role=None):
        foreign_id = data.get('foreign_id') or make_textid()
        collection = cls.by_foreign_id(foreign_id, deleted=True)
        if collection is None:
            collection = cls()
            collection.foreign_id = foreign_id
            collection.update(data)
            collection.update_creator(role)
        collection.deleted_at = None
        return collection

    def __repr__(self):
        return '<Collection(%r, %r, %r)>' % \
            (self.id, self.foreign_id, self.label)
Пример #24
0
class EntitySet(db.Model, SoftDeleteModel):
    __tablename__ = "entityset"

    # set types
    LIST = "list"
    DIAGRAM = "diagram"
    TIMELINE = "timeline"
    PROFILE = "profile"

    TYPES = frozenset([LIST, DIAGRAM, TIMELINE, PROFILE])

    id = db.Column(db.String(ENTITY_ID_LEN), primary_key=True)
    label = db.Column(db.Unicode)
    type = db.Column(db.String(10), index=True, default=LIST)
    summary = db.Column(db.Unicode, nullable=True)
    layout = db.Column("layout", JSONB, nullable=True)

    role_id = db.Column(db.Integer, db.ForeignKey("role.id"), index=True)
    role = db.relationship(Role)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey("collection.id"),
                              index=True)
    collection = db.relationship(Collection)

    parent_id = db.Column(db.String(ENTITY_ID_LEN),
                          db.ForeignKey("entityset.id"))
    parent = db.relationship("EntitySet", backref="children", remote_side=[id])

    @property
    def entities(self):
        q = db.session.query(EntitySetItem.entity_id)
        q = q.filter(EntitySetItem.entityset_id == self.id)
        q = q.filter(EntitySetItem.judgement == Judgement.POSITIVE)
        q = q.filter(EntitySetItem.deleted_at == None)  # noqa
        return [entity_id for entity_id, in q.all()]

    @classmethod
    def create(cls, data, collection, authz):
        entityset = cls()
        entityset.id = make_textid()
        entityset.layout = {}
        entityset.role_id = authz.id
        entityset.collection_id = collection.id
        entityset.update(data)
        return entityset

    @classmethod
    def by_authz(cls, authz, types=None, prefix=None):
        ids = authz.collections(authz.READ)
        q = cls.by_type(types)
        q = q.filter(cls.collection_id.in_(ids))
        if prefix is not None:
            q = q.filter(query_like(cls.label, prefix))
        return q

    @classmethod
    def by_type(cls, types):
        """Retuns EntitySets of a particular type"""
        q = EntitySet.all()
        types = ensure_list(types)
        if len(types) and types != cls.TYPES:
            q = q.filter(EntitySet.type.in_(types))
        return q

    @classmethod
    def by_collection_id(cls, collection_id, types=None):
        """Retuns EntitySets within a given collection_id"""
        q = cls.by_type(types)
        q = q.filter(EntitySet.collection_id == collection_id)
        return q

    @classmethod
    def by_entity_id(cls,
                     entity_id,
                     collection_ids=None,
                     judgements=None,
                     types=None,
                     labels=None):
        """Retuns EntitySets that include EntitySetItems with the provided entity_id.

        NOTE: This only considers EntitySetItems who haven't been deleted
        """
        q = cls.by_type(types)
        if labels is not None:
            q = q.filter(EntitySet.label.in_(ensure_list(labels)))
        q = q.join(EntitySetItem)
        q = q.filter(EntitySetItem.deleted_at == None)  # NOQA
        q = q.filter(EntitySetItem.entity_id == entity_id)
        if collection_ids:
            q = q.filter(EntitySet.collection_id.in_(collection_ids))
        if judgements is not None:
            q = q.filter(EntitySetItem.judgement.in_(ensure_list(judgements)))
        return q

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at):
        EntitySetItem.delete_by_collection(collection_id)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    def items(self, authz=None, deleted=False):
        q = EntitySetItem.all(deleted=deleted)
        if authz is not None:
            ids = authz.collections(authz.READ)
            q = q.filter(EntitySetItem.collection_id.in_(ids))
        q = q.filter(EntitySetItem.entityset_id == self.id)
        q = q.order_by(EntitySetItem.created_at.asc())
        return q

    def profile(self, judgements=None, deleted=False):
        q = self.items(deleted=deleted)
        if judgements is not None:
            q = q.filter(EntitySetItem.judgement.in_(judgements))
        return q

    def merge(self, other, merged_by_id):
        """Merge two entity_sets into each other. The older one is
        retained. This tries to retain a state where there is only
        one judgement between a set and an entity.
        """
        if other.id == self.id:
            return self
        if other.created_at > self.created_at:
            return other.merge(self, merged_by_id)

        local_items = {i.entity_id: i for i in self.items()}
        for remote in other.items():
            local = local_items.get(remote.entity_id)
            if local is None:
                remote.entityset_id = self.id
                remote.updated_at = datetime.utcnow()
                db.session.add(remote)
                continue
            judgement = local.judgment + remote.judgement
            if judgement == local.judgement:
                remote.delete()
                continue

            origin = local.compared_to_entity_id or remote.compared_to_entity_id
            combined = EntitySetItem(
                entityset_id=self.id,
                entity_id=local.entity_id,
                collection_id=local.collection_id,
                added_by_id=merged_by_id,
                judgement=judgement,
                compared_to_entity_id=origin,
            )
            db.session.add(combined)
            local.delete()
            remote.delete()
        other.delete()
        self.updated_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()
        return self

    def update(self, data):
        self.label = data.get("label", self.label)
        self.type = data.get("type", self.type)
        self.summary = data.get("summary", self.summary)
        self.layout = data.get("layout", self.layout)
        self.updated_at = datetime.utcnow()
        self.deleted_at = None
        db.session.add(self)

    def delete(self, deleted_at=None):
        pq = db.session.query(EntitySetItem)
        pq = pq.filter(EntitySetItem.entityset_id == self.id)
        pq = pq.filter(EntitySetItem.deleted_at == None)  # noqa
        pq.update({EntitySetItem.deleted_at: deleted_at},
                  synchronize_session=False)

        for mapping in self.mappings:
            mapping.entityset_id = None
            db.session.add(mapping)

        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            "id": stringify(self.id),
            "type": self.type,
            "label": self.label,
            "summary": self.summary,
            "layout": self.layout,
            "role_id": stringify(self.role_id),
            "collection_id": stringify(self.collection_id),
        })
        return data

    def __repr__(self):
        return "<EntitySet(%r, %r)>" % (self.id, self.collection_id)
Пример #25
0
class Notification(db.Model, IdModel, DatedModel):
    GLOBAL = 'Global'

    _event = db.Column('event', db.String(255), nullable=False)
    channels = db.Column(ARRAY(db.String(255)), index=True)
    params = db.Column(JSONB)

    actor_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)
    actor = db.relationship(Role)

    @hybrid_property
    def event(self):
        return Events.get(self._event)

    @event.setter
    def event(self, event):
        self._event = event.name

    def iterparams(self):
        if self.actor_id is not None:
            yield 'actor', Role, self.actor_id
        if self.event is None:
            return
        for name, clazz in self.event.params.items():
            value = self.params.get(name)
            if value is not None:
                yield name, clazz, value

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            'id': self.id,
            'actor_id': self.actor_id,
            'event': self._event,
            'params': self.params
        })
        return data

    @classmethod
    def publish(cls, event, actor_id=None, channels=[], params={}):
        notf = cls()
        notf.event = event
        notf.actor_id = actor_id
        notf.params = params
        notf.channels = list(set([c for c in channels if c is not None]))
        db.session.add(notf)
        return notf

    @classmethod
    def by_channels(cls, channels, role, since=None):
        channels = cast(channels, ARRAY(db.String(255)))
        q = cls.all()
        q = q.filter(cls.channels.overlap(channels))
        q = q.filter(cls._event.in_(Events.names()))
        q = q.filter(or_(
            cls.actor_id != role.id,
            cls.actor_id == None  # noqa
        ))
        since = since or role.notified_at
        if since is not None and role.notified_at is not None:
            since = max(since, role.notified_at)
        if since is not None:
            q = q.filter(cls.created_at >= since)
        q = q.order_by(cls.created_at.desc())
        return q

    @classmethod
    def delete_by_channel(cls, channel):
        q = cls.all()
        q = q.filter(cls.channels.any(channel))
        q.delete(synchronize_session=False)
Пример #26
0
class Entity(db.Model, SoftDeleteModel):
    THING = 'Thing'
    LEGAL_ENTITY = 'LegalEntity'

    id = db.Column(db.String(ENTITY_ID_LEN),
                   primary_key=True,
                   default=make_textid,
                   nullable=False,
                   unique=False)
    name = db.Column(db.Unicode)
    schema = db.Column(db.String(255), index=True)
    foreign_id = db.Column(db.Unicode)
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def signed_id(self):
        return self.collection.ns.sign(self.id)

    def delete_matches(self):
        pq = db.session.query(Match)
        pq = pq.filter(
            or_(Match.entity_id == self.id, Match.match_id == self.id))
        pq.delete(synchronize_session=False)
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_matches()
        deleted_at = deleted_at or datetime.utcnow()
        super(Entity, self).delete(deleted_at=deleted_at)

    def update(self, entity):
        proxy = model.get_proxy(entity)
        proxy.schema.validate(entity)
        self.schema = proxy.schema.name
        previous = self.to_proxy()
        for prop in proxy.iterprops():
            # Do not allow the user to overwrite hashes because this could
            # lead to a user accessing random objects.
            if prop.type == registry.checksum:
                proxy.set(prop, previous.get(prop), cleaned=True, quiet=True)
        self.data = proxy.properties
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    def to_proxy(self):
        proxy = model.get_proxy({
            'id': self.id,
            'schema': self.schema,
            'properties': self.data
        })
        proxy.add('name', self.name)
        proxy.set('indexUpdatedAt', self.updated_at)
        return proxy

    @classmethod
    def create(cls, data, collection):
        foreign_id = data.get('foreign_id')
        ent = cls.by_foreign_id(foreign_id, collection.id, deleted=True)
        if ent is None:
            ent = cls()
            ent.id = make_textid()
            ent.collection = collection
            ent.foreign_id = foreign_id
            ent.data = {}
        ent.deleted_at = None
        ent.update(data)
        return ent

    @classmethod
    def by_id(cls, entity_id, collection_id=None):
        entity_id, _ = Namespace.parse(entity_id)
        q = cls.all()
        q = q.filter(cls.id == entity_id)
        return q.first()

    @classmethod
    def by_foreign_id(cls, foreign_id, collection_id, deleted=False):
        if foreign_id is None:
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        q = q.filter(cls.foreign_id == foreign_id)
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def by_collection(cls, collection_id):
        return cls.all().filter(Entity.collection_id == collection_id)

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        deleted_at = deleted_at or datetime.utcnow()

        entities = db.session.query(cls.id)
        entities = entities.filter(cls.collection_id == collection_id)
        entities = entities.subquery()

        pq = db.session.query(Match)
        pq = pq.filter(Match.entity_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.match_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)
Пример #27
0
class Collection(db.Model, IdModel, SoftDeleteModel):
    """A set of documents and entities against which access control is
    enforced."""

    # Category schema for collections.
    # TODO: add extra weight info.
    # TODO: should this be configurable?
    CATEGORIES = {
        'news': lazy_gettext('News archives'),
        'leak': lazy_gettext('Leaks'),
        'land': lazy_gettext('Land registry'),
        'gazette': lazy_gettext('Gazettes'),
        'court': lazy_gettext('Court archives'),
        'company': lazy_gettext('Company registries'),
        'watchlist': lazy_gettext('Watchlists'),
        'investigation': lazy_gettext('Personal collections'),
        'sanctions': lazy_gettext('Sanctions lists'),
        'scrape': lazy_gettext('Scrapes'),
        'procurement': lazy_gettext('Procurement'),
        'grey': lazy_gettext('Grey literature'),
        'license': lazy_gettext('Licenses and concessions'),
        'regulatory': lazy_gettext('Regulatory filings'),
        'other': lazy_gettext('Other material')
    }

    DEFAULT = 'other'

    label = db.Column(db.Unicode)
    summary = db.Column(db.Unicode, nullable=True)
    category = db.Column(db.Unicode, nullable=True)
    countries = db.Column(ARRAY(db.Unicode()), default=[])
    languages = db.Column(ARRAY(db.Unicode()), default=[])
    foreign_id = db.Column(db.Unicode, unique=True, nullable=False)
    publisher = db.Column(db.Unicode, nullable=True)
    publisher_url = db.Column(db.Unicode, nullable=True)
    info_url = db.Column(db.Unicode, nullable=True)
    data_url = db.Column(db.Unicode, nullable=True)

    # A casefile is a type of collection which is used to manage the state
    # of an investigation. Unlike normal collections, cases do not serve
    # as source material, but as a mechanism of analysis.
    casefile = db.Column(db.Boolean, default=False)

    creator_id = db.Column(db.Integer, db.ForeignKey('role.id'), nullable=True)
    creator = db.relationship(Role)

    def update(self, data, creator=None):
        self.label = data.get('label', self.label)
        self.summary = data.get('summary', self.summary)
        self.summary = data.get('summary', self.summary)
        self.publisher = data.get('publisher', self.publisher)
        self.publisher_url = data.get('publisher_url', self.publisher_url)
        self.info_url = data.get('info_url', self.info_url)
        self.data_url = data.get('data_url', self.data_url)
        self.category = data.get('category') or self.DEFAULT
        self.casefile = as_bool(data.get('casefile'), default=False)
        self.countries = data.get('countries', [])
        self.languages = data.get('languages', [])
        if creator is None:
            creator = Role.by_id(data.get('creator_id'))
        self.creator = creator
        self.updated_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()
        if creator is not None:
            Permission.grant(self, creator, True, True)

    @property
    def roles(self):
        if not hasattr(self, '_roles'):
            q = db.session.query(Permission.role_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.collection_id == self.id)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            self._roles = [e.role_id for e in q.all()]
        return self._roles

    @property
    def kind(self):
        return 'casefile' if self.casefile else 'source'

    @classmethod
    def by_foreign_id(cls, foreign_id, deleted=False):
        if foreign_id is None:
            return
        q = cls.all(deleted=deleted)
        return q.filter(cls.foreign_id == foreign_id).first()

    @classmethod
    def all_by_ids(cls, ids, deleted=False, authz=None):
        q = super(Collection, cls).all_by_ids(ids, deleted=deleted)
        if authz is not None and not authz.is_admin:
            q = q.join(Permission, cls.id == Permission.collection_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            q = q.filter(Permission.role_id.in_(authz.roles))
        return q

    @classmethod
    def create(cls, data, role=None):
        foreign_id = data.get('foreign_id') or make_textid()
        collection = cls.by_foreign_id(foreign_id, deleted=True)
        if collection is None:
            collection = cls()
            collection.foreign_id = foreign_id
        collection.update(data, creator=role)
        collection.deleted_at = None
        return collection

    def __repr__(self):
        return '<Collection(%r, %r, %r)>' % \
            (self.id, self.foreign_id, self.label)
Пример #28
0
class Role(db.Model, IdModel, SoftDeleteModel):
    """A user, group or other access control subject."""
    __tablename__ = 'role'

    USER = '******'
    GROUP = 'group'
    SYSTEM = 'system'
    TYPES = [USER, GROUP, SYSTEM]

    SYSTEM_GUEST = 'guest'
    SYSTEM_USER = '******'

    #: Generates URL-safe signatures for invitations.
    SIGNATURE = URLSafeTimedSerializer(settings.SECRET_KEY)

    #: Signature maximum age, defaults to 1 day
    SIGNATURE_MAX_AGE = 60 * 60 * 24

    foreign_id = db.Column(db.Unicode(2048), nullable=False, unique=True)
    name = db.Column(db.Unicode, nullable=False)
    email = db.Column(db.Unicode, nullable=True)
    type = db.Column(db.Enum(*TYPES, name='role_type'), nullable=False)
    api_key = db.Column(db.Unicode, nullable=True)
    is_admin = db.Column(db.Boolean, nullable=False, default=False)
    is_muted = db.Column(db.Boolean, nullable=False, default=False)
    is_tester = db.Column(db.Boolean, nullable=False, default=False)
    is_blocked = db.Column(db.Boolean, nullable=False, default=False)
    password_digest = db.Column(db.Unicode, nullable=True)
    password = None
    reset_token = db.Column(db.Unicode, nullable=True)
    locale = db.Column(db.Unicode, nullable=True)

    permissions = db.relationship('Permission', backref='role')

    @property
    def has_password(self):
        return self.password_digest is not None

    @property
    def is_public(self):
        return self.id in self.public_roles()

    @property
    def is_alertable(self):
        if self.email is None:
            return False
        if self.is_muted is True:
            return False
        # TODO: ignore people that have not logged in for a certain time?
        return True

    @property
    def label(self):
        return anonymize_email(self.name, self.email)

    def update(self, data):
        self.name = data.get('name', self.name)
        self.is_muted = data.get('is_muted', self.is_muted)
        self.is_tester = data.get('is_tester', self.is_tester)
        if data.get('password'):
            self.set_password(data.get('password'))
        self.locale = stringify(data.get('locale', self.locale))
        self.updated_at = datetime.utcnow()

    def clear_roles(self):
        """Removes any existing roles from group membership."""
        self.roles = []
        self.updated_at = datetime.utcnow()
        db.session.add(self)
        db.session.flush()

    def add_role(self, role):
        """Adds an existing role as a membership of a group."""
        self.roles.append(role)
        db.session.add(role)
        db.session.add(self)
        self.updated_at = datetime.utcnow()

    def to_dict(self):
        data = self.to_dict_dates()
        data.update({
            'id': stringify(self.id),
            'type': self.type,
            'name': self.name,
            'label': self.label,
            'email': self.email,
            'locale': self.locale,
            'api_key': self.api_key,
            'is_admin': self.is_admin,
            'is_muted': self.is_muted,
            'is_tester': self.is_tester,
            'has_password': self.has_password,
            # 'notified_at': self.notified_at
        })
        return data

    @classmethod
    def by_foreign_id(cls, foreign_id):
        if foreign_id is not None:
            return cls.all().filter_by(foreign_id=foreign_id).first()

    @classmethod
    def by_email(cls, email):
        if email is None:
            return None
        q = cls.all()
        q = q.filter(func.lower(cls.email) == email.lower())
        return q.first()

    @classmethod
    def by_api_key(cls, api_key):
        if api_key is not None:
            return cls.all().filter_by(api_key=api_key).first()

    @classmethod
    def load_or_create(cls, foreign_id, type, name, email=None, is_admin=None):
        role = cls.by_foreign_id(foreign_id)

        if role is None:
            role = cls()
            role.foreign_id = foreign_id
            role.name = name or email
            role.type = type
            role.is_admin = False
            role.is_muted = False
            role.is_tester = False
            role.is_blocked = False
            role.notified_at = datetime.utcnow()

        if role.api_key is None:
            role.api_key = make_textid()

        if email is not None:
            role.email = email

        if is_admin is not None:
            role.is_admin = is_admin

        # see: https://github.com/alephdata/aleph/issues/111
        auto_admins = [a.lower() for a in settings.ADMINS]
        if email is not None and email.lower() in auto_admins:
            role.is_admin = True

        db.session.add(role)
        db.session.flush()
        return role

    @classmethod
    def load_cli_user(cls):
        return cls.load_or_create(foreign_id=settings.SYSTEM_USER,
                                  name='Aleph',
                                  type=cls.USER,
                                  is_admin=True)

    @classmethod
    def load_id(cls, foreign_id):
        """Load a role and return the ID."""
        if not hasattr(settings, '_roles'):
            settings._roles = {}
        if foreign_id not in settings._roles:
            role_id = cls.all_ids().filter_by(foreign_id=foreign_id).first()
            if role_id is not None:
                settings._roles[foreign_id] = role_id[0]
        return settings._roles.get(foreign_id)

    @classmethod
    def public_roles(cls):
        """Roles which make a collection to be considered public."""
        return set([
            cls.load_id(cls.SYSTEM_USER),
            cls.load_id(cls.SYSTEM_GUEST),
        ])

    @classmethod
    def by_prefix(cls, prefix, exclude=[]):
        """Load a list of roles matching a name, email address, or foreign_id.

        :param str pattern: Pattern to match.
        """
        query = prefix.replace('%', ' ').replace('_', ' ')
        query = '%%%s%%' % query
        q = cls.all()
        q = q.filter(Role.type == Role.USER)
        if len(exclude):
            q = q.filter(not_(Role.id.in_(exclude)))
        q = q.filter(
            or_(
                func.lower(cls.email) == prefix.lower(),
                cls.name.ilike(query)))
        q = q.order_by(Role.id.asc())
        return q

    @classmethod
    def all_groups(cls, authz):
        q = cls.all()
        q = q.filter(Role.type == Role.GROUP)
        q = q.order_by(Role.name.asc())
        q = q.order_by(Role.foreign_id.asc())
        if not authz.is_admin:
            q = q.filter(Role.id.in_(authz.roles))
        return q

    @classmethod
    def all_users(cls):
        return cls.all().filter(Role.type == Role.USER)

    @classmethod
    def all_system(cls):
        return cls.all().filter(Role.type == Role.SYSTEM)

    def set_password(self, secret):
        """Hashes and sets the role password.

        :param str secret: The password to be set.
        """
        self.password_digest = generate_password_hash(secret)

    def check_password(self, secret):
        """Checks the password if it matches the role password hash.

        :param str secret: The password to be checked.
        :rtype: bool
        """
        digest = self.password_digest or ''
        return check_password_hash(digest, secret)

    def __repr__(self):
        return '<Role(%r,%r)>' % (self.id, self.foreign_id)
Пример #29
0
class Entity(db.Model, UuidModel, SoftDeleteModel):
    THING = 'Thing'

    name = db.Column(db.Unicode)
    schema = db.Column(db.String(255), index=True)
    foreign_ids = db.Column(ARRAY(db.Unicode()))
    data = db.Column('data', JSONB)

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('entities',
                                                    lazy='dynamic'))  # noqa

    @property
    def model(self):
        return model.get(self.schema)

    @property
    def terms(self):
        terms = set([self.name])
        for alias in ensure_list(self.data.get('alias')):
            if alias is not None and len(alias):
                terms.add(alias)
        return terms

    @property
    def regex_terms(self):
        # This is to find the shortest possible regex for each entity.
        # If, for example, and entity matches both "Al Qaeda" and
        # "Al Qaeda in Iraq, Syria and the Levant", it is useless to
        # search for the latter.
        terms = set([match_form(t) for t in self.terms])
        regex_terms = set()
        for term in terms:
            if term is None or len(term) < 4 or len(term) > 120:
                continue
            contained = False
            for other in terms:
                if other is None or other == term:
                    continue
                if other in term:
                    contained = True
            if not contained:
                regex_terms.add(term)
        return regex_terms

    def delete_matches(self):
        pq = db.session.query(Match)
        pq = pq.filter(
            or_(Match.entity_id == self.id, Match.match_id == self.id))
        pq.delete(synchronize_session=False)
        db.session.refresh(self)

    def delete(self, deleted_at=None):
        self.delete_matches()
        deleted_at = deleted_at or datetime.utcnow()
        for alert in self.alerts:
            alert.delete(deleted_at=deleted_at)
        super(Entity, self).delete(deleted_at=deleted_at)

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        from aleph.model import Alert
        deleted_at = deleted_at or datetime.utcnow()

        entities = db.session.query(cls.id)
        entities = entities.filter(cls.collection_id == collection_id)
        entities = entities.subquery()

        pq = db.session.query(Alert)
        pq = pq.filter(Alert.entity_id.in_(entities))
        pq.update({Alert.deleted_at: deleted_at}, synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.entity_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(Match)
        pq = pq.filter(Match.match_id.in_(entities))
        pq.delete(synchronize_session=False)

        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    def merge(self, other):
        if self.id == other.id:
            raise ValueError("Cannot merge an entity with itself.")
        if self.collection_id != other.collection_id:
            raise ValueError(
                "Cannot merge entities from different collections.")  # noqa

        self.schema = model.precise_schema(self.schema, other.schema)
        self.foreign_ids = string_set(self.foreign_ids, self.foreign_ids)
        self.created_at = min((self.created_at, other.created_at))
        self.updated_at = datetime.utcnow()

        data = merge_data(self.data, other.data)
        if self.name != other.name:
            data = merge_data(data, {'alias': [other.name]})
        self.data = data

        # update alerts
        from aleph.model.alert import Alert
        q = db.session.query(Alert).filter(Alert.entity_id == other.id)
        q.update({Alert.entity_id: self.id})

        # delete source entities
        other.delete()
        db.session.add(self)
        db.session.commit()
        db.session.refresh(other)

    def update(self, entity):
        self.schema = entity.get('schema')

        data = entity.get('properties')
        if is_mapping(data):
            data['name'] = [entity.get('name')]
            self.data = self.model.validate(data)
        elif self.data is None:
            self.data = {}

        self.data.pop('name', None)
        self.name = entity.get('name')

        # TODO: should this be mutable?
        # self.foreign_ids = string_set(entity.get('foreign_ids'))
        self.updated_at = datetime.utcnow()
        db.session.add(self)

    @classmethod
    def create(cls, data, collection):
        foreign_ids = string_set(data.get('foreign_ids'))
        ent = cls.by_foreign_ids(foreign_ids, collection.id, deleted=True)
        if ent is None:
            ent = cls()
            ent.id = make_textid()
            ent.collection = collection
            ent.foreign_ids = foreign_ids
        ent.update(data)
        ent.deleted_at = None
        return ent

    @classmethod
    def by_foreign_ids(cls, foreign_ids, collection_id, deleted=False):
        if not len(foreign_ids):
            return None
        q = cls.all(deleted=deleted)
        q = q.filter(Entity.collection_id == collection_id)
        foreign_id = func.cast(foreign_ids, ARRAY(db.Unicode()))
        q = q.filter(cls.foreign_ids.contains(foreign_id))
        q = q.order_by(Entity.deleted_at.desc().nullsfirst())
        return q.first()

    @classmethod
    def all_ids(cls, deleted=False, authz=None):
        q = super(Entity, cls).all_ids(deleted=deleted)
        if authz is not None and not authz.is_admin:
            q = q.join(Permission,
                       cls.collection_id == Permission.collection_id)
            q = q.filter(Permission.deleted_at == None)  # noqa
            q = q.filter(Permission.read == True)  # noqa
            q = q.filter(Permission.role_id.in_(authz.roles))
        return q

    @classmethod
    def latest(cls):
        q = db.session.query(func.max(cls.updated_at))
        q = q.filter(cls.deleted_at == None)  # noqa
        return q.scalar()

    def __repr__(self):
        return '<Entity(%r, %r)>' % (self.id, self.name)
Пример #30
0
class Mapping(db.Model, SoftDeleteModel):
    """A mapping to load entities from a table"""
    __tablename__ = 'mapping'

    FAILED = 'failed'
    SUCCESS = 'success'
    STATUS = {SUCCESS: lazy_gettext('success'), FAILED: lazy_gettext('failed')}

    id = db.Column(db.Integer, primary_key=True)
    query = db.Column('query', JSONB)

    role_id = db.Column(db.Integer, db.ForeignKey('role.id'), index=True)
    role = db.relationship(Role,
                           backref=db.backref('mappings',
                                              lazy='dynamic'))  # noqa

    collection_id = db.Column(db.Integer,
                              db.ForeignKey('collection.id'),
                              index=True)  # noqa
    collection = db.relationship(Collection,
                                 backref=db.backref('mappings',
                                                    lazy='dynamic'))  # noqa

    table_id = db.Column(db.String(ENTITY_ID_LEN), index=True)

    last_run_status = db.Column(db.Unicode, nullable=True)
    last_run_err_msg = db.Column(db.Unicode, nullable=True)

    def update(self, query=None, table_id=None):
        self.updated_at = datetime.utcnow()
        if query:
            self.query = query
        if table_id:
            self.table_id = table_id
        db.session.add(self)
        db.session.commit()

    def set_status(self, status, error=None):
        self.last_run_status = status
        self.last_run_err_msg = error
        db.session.add(self)
        db.session.commit()

    def delete(self, deleted_at=None):
        self.deleted_at = deleted_at or datetime.utcnow()
        db.session.add(self)
        db.session.commit()

    def to_dict(self):
        data = self.to_dict_dates()
        status = self.STATUS.get(self.last_run_status)
        data.update({
            'id': stringify(self.id),
            'query': dict(self.query),
            'role_id': stringify(self.role_id),
            'collection_id': stringify(self.collection_id),
            'table_id': self.table_id,
            'last_run_status': status,
            'last_run_err_msg': self.last_run_err_msg
        })
        return data

    @classmethod
    def by_collection(cls, collection_id, table_id=None):
        q = cls.all().filter(cls.collection_id == collection_id)
        if table_id is not None:
            q = q.filter(cls.table_id == table_id)
        return q

    @classmethod
    def delete_by_collection(cls, collection_id, deleted_at=None):
        deleted_at = deleted_at or datetime.utcnow()
        pq = db.session.query(cls)
        pq = pq.filter(cls.collection_id == collection_id)
        pq = pq.filter(cls.deleted_at == None)  # noqa
        pq.update({cls.deleted_at: deleted_at}, synchronize_session=False)

    @classmethod
    def create(cls, query, table_id, collection, role_id):
        mapping = cls()
        mapping.role_id = role_id
        mapping.query = query
        mapping.collection_id = collection.id
        mapping.table_id = table_id
        mapping.update()
        return mapping

    def __repr__(self):
        return '<Mapping(%r, %r)>' % (self.id, self.table_id)
Пример #31
0
Файл: role.py Проект: pudo/aleph
    def all_users(cls, has_email=False):
        q = cls.all().filter(Role.type == Role.USER)
        if has_email:
            q = q.filter(Role.email != None)  # noqa
        return q

    def set_password(self, secret):
        """Hashes and sets the role password.

        :param str secret: The password to be set.
        """
        self.password_digest = generate_password_hash(secret)

    def check_password(self, secret):
        """Checks the password if it matches the role password hash.

        :param str secret: The password to be checked.
        :rtype: bool
        """
        return check_password_hash(self.password_digest or '', secret)

    def __repr__(self):
        return '<Role(%r,%r)>' % (self.id, self.foreign_id)


Role.members = db.relationship(Role,
                               secondary=membership,
                               primaryjoin=Role.id == membership.c.group_id,
                               secondaryjoin=Role.id == membership.c.member_id,
                               backref="roles")