Exemplo n.º 1
0
def create_concept_instance(db: PartitionedDatabase, concept_id_or_name: str,
                            body: JsonDict):
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        record = to_record(properties, body["values"])
        records = db.create_records_tx(tx,
                                       concept_id_or_name, [record],
                                       fill_missing=True)

        if not records:
            raise BadRequest(
                f"Could not create concept instance [{concept_id_or_name}]")
        record = records[0]

        # Log the created concept instance:
        x_bf_trace_id = AuditLogger.trace_id_header()

        # Emit "CreateRecord" event:
        PennsieveJobsClient.get().send_changelog_event(
            organization_id=db.organization_id,
            dataset_id=db.dataset_id,
            user_id=db.user_id,
            event=CreateRecord(id=record.id,
                               name=record.name,
                               model_id=model.id),
            trace_id=TraceId(x_bf_trace_id),
        )

        AuditLogger.get().message().append("records",
                                           str(record.id)).log(x_bf_trace_id)

        return to_concept_instance(record, model, properties), 201
Exemplo n.º 2
0
def get_all_concept_instances(
    db: PartitionedDatabase,
    concept_id_or_name: str,
    limit: int,
    offset: int,
    order_by: Optional[str] = None,
    ascending: Optional[bool] = None,
) -> List[JsonDict]:
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        results = db.get_all_records_offset_tx(
            tx,
            model=model,
            limit=limit,
            offset=offset,
            fill_missing=True,
            order_by=None
            if order_by is None and ascending is None else OrderByField(
                name="created_at" if order_by is None else order_by,
                ascending=True if ascending is None else ascending,
            ),
        )

        x_bf_trace_id = AuditLogger.trace_id_header()
        record_ids = []
        instances = []
        for record in results:
            record_ids.append(str(record.id))
            instances.append(to_concept_instance(record, model, properties))

        AuditLogger.get().message().append("records",
                                           *record_ids).log(x_bf_trace_id)

        return instances
Exemplo n.º 3
0
def publish_records_of_model(db: PartitionedDatabase, tx: Transaction,
                             model: Model, config, s3) -> FileManifest:
    """
    Export the records of a specific model.
    """
    log.info(f"Writing records for model '{model.name}'")

    output_file: OutputFile = OutputFile.csv_for_model(model.name).with_prefix(
        os.path.join(config.s3_publish_key, METADATA))

    model_properties: List[ModelProperty] = db.get_properties_tx(tx, model)

    linked_properties: List[ModelRelationship] = sorted(
        db.get_outgoing_model_relationships_tx(tx,
                                               from_model=model,
                                               one_to_many=False),
        key=lambda r: r.index or sys.maxsize,
    )

    # Construct the header list for a model:
    headers: List[str] = record_headers(model_properties, linked_properties)

    with s3_csv_writer(s3, config.s3_bucket, str(output_file),
                       headers) as writer:
        for r in db.get_all_records_offset_tx(
                tx=tx,
                model=model,
                embed_linked=True,
                fill_missing=True,
                limit=None,
        ):
            writer.writerow(record_row(r, model_properties, linked_properties))

    return output_file.with_prefix(METADATA).as_manifest(
        size_of(s3, config.s3_bucket, output_file))
Exemplo n.º 4
0
def get_concept_instance(db: PartitionedDatabase, concept_id_or_name: str,
                         concept_instance_id: str) -> JsonDict:
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        record = db.get_record_tx(tx, concept_instance_id, fill_missing=True)
        if record is None:
            raise NotFound(f"Could not get record {concept_instance_id}")
        return to_concept_instance(record, model, properties)
Exemplo n.º 5
0
def delete_property(
    db: PartitionedDatabase,
    model_id: str,
    property_name: str,
    modify_records: bool = False,
) -> None:

    x_bf_trace_id = AuditLogger.trace_id_header()
    max_record_count = current_app.config[
        "config"].max_record_count_for_property_deletion

    with db.transaction() as tx:
        model = db.get_model_tx(tx, model_id)

        if modify_records:
            record_count = db.model_property_record_count_tx(
                tx, model_id, property_name)
            if record_count > 0:
                if record_count > max_record_count:
                    raise BadRequest(
                        f"Cannot delete properties that are used on > {max_record_count} records. This property is used on {record_count}"
                    )
                model_properties = [
                    p for p in db.get_properties_tx(tx, model_id)
                    if p.name == property_name
                ]
                if not model_properties:
                    raise NotFound(f"no such property {property_name} exists")
                updated_records = db.delete_property_from_all_records_tx(
                    tx, model_id, model_properties[0])
                if updated_records != record_count:
                    raise ServerError(
                        "the property was not removed from all records")

        deleted = db.delete_property_tx(tx, model_id, property_name)
        if deleted is None:
            raise NotFound(
                f"Could not delete property [{model_id}.{property_name}]")

        PennsieveJobsClient.get().send_changelog_event(
            organization_id=db.organization_id,
            dataset_id=db.dataset_id,
            user_id=db.user_id,
            event=DeleteModelProperty(
                property_name=deleted.name,
                model_id=UUID(model.id),
                model_name=model.name,
            ),
            trace_id=TraceId(x_bf_trace_id),
        )
Exemplo n.º 6
0
def get_records_related_to_package(
    db: PartitionedDatabase,
    proxy_type: str,
    package_id: str,
    concept_id_or_name: str,
    limit: Optional[int] = None,
    offset: Optional[int] = None,
    relationship_order_by: Optional[str] = None,
    record_order_by: Optional[str] = None,
    ascending: bool = False,
) -> List[JsonDict]:
    with db.transaction() as tx:

        x_bf_trace_id = AuditLogger.trace_id_header()
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)

        results = []
        package_proxy_ids = []
        record_ids = []

        for pp, r in db.get_records_related_to_package_tx(
                tx=tx,
                package_id=PackageNodeId(package_id),
                related_model_id_or_name=concept_id_or_name,
                limit=limit,
                offset=offset,
                relationship_order_by=relationship_order_by,
                record_order_by=record_order_by,
                ascending=ascending,
        ):
            package_proxy_ids.append(str(pp.id))
            record_ids.append(str(r.id))
            t = (
                # All package-to-record relationships are defined with the
                # internal `@IN_PACKAGE` relationship type:
                #   (Package)<-[`@IN_PACKAGE`]-(Record)
                # For legacy consistency, we just use the generic "belongs_to"
                # here:
                make_proxy_relationship_instance(r.id, pp, "belongs_to"),
                to_concept_instance(r, model, properties),
            )
            results.append(t)

        AuditLogger.get().message().append("package-proxies",
                                           *package_proxy_ids).append(
                                               "records",
                                               *record_ids).log(x_bf_trace_id)

        return results
Exemplo n.º 7
0
def get_related(
    db: PartitionedDatabase,
    concept_id: str,
    id_: str,
    target_concept_id_or_name: str,
    relationship_order_by: Optional[str] = None,
    record_order_by: Optional[str] = None,
    ascending: Optional[bool] = True,
    limit: int = 100,
    offset: int = 0,
    include_incoming_linked_properties: bool = False,
) -> List[JsonDict]:
    with db.transaction() as tx:

        model = db.get_model_tx(tx, target_concept_id_or_name)
        properties = db.get_properties_tx(tx, target_concept_id_or_name)
        order_by: Optional[ModelOrderBy] = None
        asc = ascending if ascending is not None else True

        if record_order_by is not None:
            order_by = ModelOrderBy.field(name=record_order_by, ascending=asc)
        elif relationship_order_by is not None:
            order_by = ModelOrderBy.relationship(
                type=relationship_order_by, ascending=asc
            )
        else:
            order_by = ModelOrderBy.field(
                name="created_at", ascending=True
            )  # default order for backwards compatibility

        related = db.get_related_records_tx(
            tx,
            start_from=id_,
            model_name=target_concept_id_or_name,
            order_by=order_by,
            limit=limit,
            offset=offset,
            include_incoming_linked_properties=include_incoming_linked_properties,
        )
        return [
            (
                to_legacy_relationship_instance(rr)
                if rr.one_to_many
                else to_linked_property(rr),
                to_concept_instance(r, model, properties),
            )
            for (rr, r) in related
        ]
Exemplo n.º 8
0
def update_concept_instance(
    db: PartitionedDatabase,
    concept_id_or_name: str,
    concept_instance_id: str,
    body: JsonDict,
) -> JsonDict:
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        record = db.get_record_tx(
            tx,
            concept_instance_id,
            embed_linked=False,
            fill_missing=True,
        )
        if record is None:
            raise NotFound(f"Could not get record {concept_instance_id}")

        updated_record = db.update_record_tx(
            tx,
            concept_instance_id,
            to_record(properties, body["values"]),
            fill_missing=True,
        )

        x_bf_trace_id = AuditLogger.trace_id_header()

        # Emit a "UpdateRecord" event:
        PennsieveJobsClient.get().send_changelog_event(
            organization_id=db.organization_id,
            dataset_id=db.dataset_id,
            user_id=db.user_id,
            event=UpdateRecord(
                id=record.id,
                name=record.name,
                model_id=model.id,
                properties=UpdateRecord.compute_diff(properties, record.values,
                                                     updated_record.values),
            ),
            trace_id=TraceId(x_bf_trace_id),
        )

        return to_concept_instance(updated_record, model, properties)
Exemplo n.º 9
0
def delete_concept_instances(db: PartitionedDatabase,
                             concept_id_or_name: str) -> JsonDict:
    # HACK: request bodies on DELETE requests do not have defined
    # semantics and are not directly support by OpenAPI/Connexion. See
    #  - https://swagger.io/docs/specification/describing-request-body
    #  - https://github.com/zalando/connexion/issues/896
    body = connexion.request.json

    success = []
    errors = []
    events = []

    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, model)

        for instance_id in body:
            try:
                deleted = db.delete_record_tx(tx, instance_id, properties)
                events.append(
                    DeleteRecord(
                        id=deleted.id,
                        name=deleted.name,
                        model_id=model.id,
                    ))
            except Exception as e:  # noqa: F841
                errors.append([instance_id, f"Could not delete {instance_id}"])
            else:
                success.append(instance_id)

        x_bf_trace_id = AuditLogger.trace_id_header()

        # Emit a "DeleteRecord" event:
        PennsieveJobsClient.get().send_changelog_events(
            organization_id=db.organization_id,
            dataset_id=db.dataset_id,
            user_id=db.user_id,
            events=events,
            trace_id=TraceId(x_bf_trace_id),
        )

        return {"success": success, "errors": errors}
Exemplo n.º 10
0
def delete_concept_instance(db: PartitionedDatabase, concept_id_or_name: str,
                            concept_instance_id: str) -> None:
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        record = db.delete_record_tx(tx, concept_instance_id, properties)

        x_bf_trace_id = AuditLogger.trace_id_header()

        # Emit a "DeleteRecord" event:
        PennsieveJobsClient.get().send_changelog_event(
            organization_id=db.organization_id,
            dataset_id=db.dataset_id,
            user_id=db.user_id,
            event=DeleteRecord(id=record.id,
                               name=record.name,
                               model_id=model.id),
            trace_id=TraceId(x_bf_trace_id),
        )

        return to_concept_instance(record, model, properties)
Exemplo n.º 11
0
def create_concept_instance_batch(db: PartitionedDatabase,
                                  concept_id_or_name: str, body: JsonDict):
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        requests = [to_record(properties, req["values"]) for req in body]
        records = db.create_records_tx(tx,
                                       concept_id_or_name,
                                       requests,
                                       fill_missing=True)
        instances = [
            to_concept_instance(r, model, properties) for r in records
        ]
        if not instances:
            raise BadRequest(
                f"Could not create concept instances for [{concept_id_or_name}]"
            )

        # Log the created concept instance:
        x_bf_trace_id = AuditLogger.trace_id_header()

        # Emit "CreateRecord" events:
        PennsieveJobsClient.get().send_changelog_events(
            organization_id=db.organization_id,
            dataset_id=db.dataset_id,
            user_id=db.user_id,
            events=[
                CreateRecord(id=r.id, name=r.name, model_id=model.id)
                for r in records
            ],
            trace_id=TraceId(x_bf_trace_id),
        )

        AuditLogger.get().message().append("records",
                                           *[str(r.id) for r in records
                                             ]).log(x_bf_trace_id)

        return instances
Exemplo n.º 12
0
def publish_schema(
    db: PartitionedDatabase,
    tx: Transaction,
    config: PublishConfig,
    s3,
    file_manifests: List[FileManifest],
    proxy_relationship_names: List[RelationshipName],
) -> Tuple[ExportGraphSchema, FileManifest]:
    """
    Export the schema of the partitioned database into a `GraphSchema`
    instance.
    """
    schema_models: List[ExportModel] = []
    schema_relationships: List[ExportModelRelationship] = []

    log.info("Exporting graph schema")

    models: List[Model] = db.get_models_tx(tx)
    model_index: Dict[UUID, Model] = {m.id: m for m in models}

    for m in models:
        log.info(f"Building schema for model '{m.name}'")
        properties: List[ModelProperty] = db.get_properties_tx(tx, m)
        linked_properties: List[ModelRelationship] = list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   from_model=m,
                                                   one_to_many=False))

        publish_properties: List[ExportProperty] = [
            ExportProperty.model_property(
                name=p.name,
                display_name=p.display_name,
                description=p.description,
                data_type=p.data_type,
            ) for p in properties
        ] + [
            ExportProperty.linked_property(
                name=r.name,
                display_name=r.display_name,
                description=r.description,
                data_type=LinkedModelDataType(
                    to=model_index[r.to].name,
                    file=str(OutputFile.csv_for_model(m.name)),
                ),
            ) for r in sorted(linked_properties,
                              key=lambda l: l.index or sys.maxsize)
        ]

        model = ExportModel(
            model=m,
            name=m.name,
            display_name=m.display_name,
            description=m.description,
            properties=publish_properties,
        )
        schema_models.append(model)

    # If any packages exist in this dataset, add a special-cased "File" model
    if len(file_manifests) > 0:
        log.info(f"Building schema for proxy package model")
        proxy_package_model = ExportModel.package_proxy()

        # TODO: gracefully handle this case to avoid overwriting "files.csv"
        assert not any(
            m.name == proxy_package_model.name for m in schema_models
        ), (f"Cannot export package proxy schema model with name '{proxy_package_model.name}' - "
            f"a model '{m.name}' already exists. See https://app.clickup.com/t/102ndc for issue"
            )
        schema_models.append(proxy_package_model)

    relationships = db.get_outgoing_model_relationships_tx(tx,
                                                           one_to_many=True)

    for r in relationships:
        log.info(f"Building schema for relationship '{r.name}'")
        relationship = ExportModelRelationship(
            relationship=r,
            name=r.name,
            from_=model_index[r.from_].name,
            to=model_index[r.to].name,
        )
        schema_relationships.append(relationship)

    for p in proxy_relationship_names:
        log.info(f"Building schema for proxy relationship '{p}'")
        relationship = ExportModelRelationship(relationship=None,
                                               name=p,
                                               from_="",
                                               to="")
        schema_relationships.append(relationship)

    schema = ExportGraphSchema(models=schema_models,
                               relationships=schema_relationships)

    # Write "schema.json" to S3
    # ======================================================================
    schema_output_file = OutputFile.json_for_schema().with_prefix(
        os.path.join(config.s3_publish_key, METADATA))
    s3.put_object(
        Bucket=config.s3_bucket,
        Key=str(schema_output_file),
        Body=schema.to_json(camel_case=True,
                            pretty_print=True,
                            drop_nulls=True),
        RequestPayer="requester",
    )
    schema_manifest = schema_output_file.with_prefix(METADATA).as_manifest(
        size_of(s3, config.s3_bucket, schema_output_file))

    return schema, schema_manifest