示例#1
0
def ts_locs_array(
    config: ColumnElement,
    text: ColumnElement,
    tsquery: ColumnElement,
) -> ColumnElement:
    options = f"HighlightAll = TRUE, StartSel = {TS_START}, StopSel = {TS_STOP}"
    delimited = func.ts_headline(config, text, tsquery, options)
    parts = func.unnest(func.string_to_array(delimited, TS_START)).alias()
    part = column(parts.name)
    part_len = func.length(part) - len(TS_STOP)
    match_pos = func.sum(part_len).over(rows=(None, -1)) + len(TS_STOP)
    match_len = func.strpos(part, TS_STOP) - 1
    return func.array(
        select([postgresql.array([match_pos, match_len])
                ]).select_from(parts).offset(1).as_scalar(), )
示例#2
0
def handle_bucket_event(
    db: Session,
    ec: elasticsearch.Elasticsearch,
    event: indexing_schemas.BucketEventNotification,
):
    # Find workspace for event
    bulk_operations = ""
    for record in event.Records:
        bucket = record.s3.bucket.name

        object_key = urllib.parse.unquote(record.s3.object.key)
        parent_index: indexing_models.RootIndex = (
            db.query(indexing_models.RootIndex)
            .join(models.WorkspaceRoot)
            .filter(
                and_(
                    func.strpos(models.WorkspaceRoot.base_path, object_key) == 0,
                    models.WorkspaceRoot.bucket == record.s3.bucket.name,
                )
            )
            .first()
        )
        if parent_index is None:
            raise ValueError(f"no index for object {object_key}")
        # Find the user who caused this event
        actor_db: Union[models.User, None] = (
            db.query(models.User)
            .join(models.S3Token)
            .filter(models.S3Token.access_key_id == record.userIdentity.principalId)
            .first()
        )
        # Find the workspace
        workspace: Optional[models.Workspace] = None
        workspace_prefix = ""
        workspace_inner_path = ""
        if parent_index.root.root_type in [
            schemas.RootType.PRIVATE,
            schemas.RootType.PRIVATE,
        ]:
            # Extrapolate path parts from root type, assume it's {scope}/{user}/{workspace}
            key_parts = object_key.split("/")
            scope = key_parts[0]
            user_name = key_parts[1]
            workspace_name = key_parts[2]
            workspace_inner_path = "/".join(key_parts[3:])
            workspace = (
                db.query(models.Workspace)
                .join(models.User)
                .filter(
                    and_(
                        models.Workspace.name == workspace_name,
                        models.User.username == user_name,
                    )
                )
                .first()
            )
            workspace_prefix = f"{scope}/{user_name}/{workspace_name}"

        elif parent_index.root.root_type == schemas.RootType.UNMANAGED:
            # Search again for matching base
            workspace_inner_path = object_key.lstrip(
                parent_index.root.base_path
            ).lstrip("/")
            workspace = (
                db.query(models.Workspace)
                .filter(
                    func.strpos(models.Workspace.base_path, workspace_inner_path) == 0
                )
                .first()
            )
            if workspace is None:
                raise ValueError(f"No workspace found for object {object_key}")

            workspace_prefix = workspace.base_path
            workspace_inner_path = workspace_inner_path.lstrip(
                workspace.base_path
            ).lstrip("/")

        if workspace is None:
            raise ValueError(f"No workspace found for object {object_key}")
        resource_owner: models.User = workspace.owner
        root: models.WorkspaceRoot = workspace.root
        node: models.StorageNode = root.storage_node

        primary_key_short_sha256 = make_record_primary_key(
            api_url=node.api_url,
            bucket=root.bucket,
            workspace_prefix=workspace_prefix,
            path=workspace_inner_path,
        )
        if record.eventName in [
            "s3:ObjectCreated:Put",
            "s3:ObjectCreated:Post",
            "s3:ObjectCreated:Copy",
        ]:
            # Creaete a new record in elasticsearch
            # this could be an overwrite operation, so query for the old record first.
            doc = indexing_schemas.IndexDocument(
                time=record.eventTime,
                size=record.s3.object.size,
                eTag=record.s3.object.eTag,
                workspace_id=workspace.id,
                workspace_name=workspace.name,
                owner_id=resource_owner.id,
                owner_name=resource_owner.username,
                bucket=record.s3.bucket.name,
                server=node.api_url,
                root_path=root.base_path,
                root_id=root.id,
                path=workspace_inner_path,
                extension=posixpath.splitext(workspace_inner_path)[-1],
                user_shares=[share.sharee.id for share in workspace.shares],
                # TODO: group shares
            )
            bulk_operations += (
                json.dumps(
                    {
                        "update": {
                            "_index": parent_index.index_type,
                            "_id": primary_key_short_sha256,
                        }
                    },
                )
                + "\n"
            )
            bulk_operations += (
                indexing_schemas.ElasticUpsertIndexDocument(doc=doc).json() + "\n"
            )

        elif record.eventName in ["s3:ObjectRemoved:Delete"]:
            # Remove an existing record
            bulk_operations += (
                json.dumps(
                    {
                        "delete": {
                            "_index": parent_index.index_type,
                            "_id": primary_key_short_sha256,
                        }
                    }
                )
                + "\n"
            )
        else:
            raise ValueError(
                f"Bucket notification type unsupported: {record.eventName}"
            )
    ec.bulk(bulk_operations)