def create_concept_instance(db: PartitionedDatabase, concept_id_or_name: str, body: JsonDict): with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) record = to_record(properties, body["values"]) records = db.create_records_tx(tx, concept_id_or_name, [record], fill_missing=True) if not records: raise BadRequest( f"Could not create concept instance [{concept_id_or_name}]") record = records[0] # Log the created concept instance: x_bf_trace_id = AuditLogger.trace_id_header() # Emit "CreateRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=CreateRecord(id=record.id, name=record.name, model_id=model.id), trace_id=TraceId(x_bf_trace_id), ) AuditLogger.get().message().append("records", str(record.id)).log(x_bf_trace_id) return to_concept_instance(record, model, properties), 201
def get_all_concept_instances( db: PartitionedDatabase, concept_id_or_name: str, limit: int, offset: int, order_by: Optional[str] = None, ascending: Optional[bool] = None, ) -> List[JsonDict]: with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) results = db.get_all_records_offset_tx( tx, model=model, limit=limit, offset=offset, fill_missing=True, order_by=None if order_by is None and ascending is None else OrderByField( name="created_at" if order_by is None else order_by, ascending=True if ascending is None else ascending, ), ) x_bf_trace_id = AuditLogger.trace_id_header() record_ids = [] instances = [] for record in results: record_ids.append(str(record.id)) instances.append(to_concept_instance(record, model, properties)) AuditLogger.get().message().append("records", *record_ids).log(x_bf_trace_id) return instances
def publish_records_of_model(db: PartitionedDatabase, tx: Transaction, model: Model, config, s3) -> FileManifest: """ Export the records of a specific model. """ log.info(f"Writing records for model '{model.name}'") output_file: OutputFile = OutputFile.csv_for_model(model.name).with_prefix( os.path.join(config.s3_publish_key, METADATA)) model_properties: List[ModelProperty] = db.get_properties_tx(tx, model) linked_properties: List[ModelRelationship] = sorted( db.get_outgoing_model_relationships_tx(tx, from_model=model, one_to_many=False), key=lambda r: r.index or sys.maxsize, ) # Construct the header list for a model: headers: List[str] = record_headers(model_properties, linked_properties) with s3_csv_writer(s3, config.s3_bucket, str(output_file), headers) as writer: for r in db.get_all_records_offset_tx( tx=tx, model=model, embed_linked=True, fill_missing=True, limit=None, ): writer.writerow(record_row(r, model_properties, linked_properties)) return output_file.with_prefix(METADATA).as_manifest( size_of(s3, config.s3_bucket, output_file))
def get_concept_instance(db: PartitionedDatabase, concept_id_or_name: str, concept_instance_id: str) -> JsonDict: with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) record = db.get_record_tx(tx, concept_instance_id, fill_missing=True) if record is None: raise NotFound(f"Could not get record {concept_instance_id}") return to_concept_instance(record, model, properties)
def delete_property( db: PartitionedDatabase, model_id: str, property_name: str, modify_records: bool = False, ) -> None: x_bf_trace_id = AuditLogger.trace_id_header() max_record_count = current_app.config[ "config"].max_record_count_for_property_deletion with db.transaction() as tx: model = db.get_model_tx(tx, model_id) if modify_records: record_count = db.model_property_record_count_tx( tx, model_id, property_name) if record_count > 0: if record_count > max_record_count: raise BadRequest( f"Cannot delete properties that are used on > {max_record_count} records. This property is used on {record_count}" ) model_properties = [ p for p in db.get_properties_tx(tx, model_id) if p.name == property_name ] if not model_properties: raise NotFound(f"no such property {property_name} exists") updated_records = db.delete_property_from_all_records_tx( tx, model_id, model_properties[0]) if updated_records != record_count: raise ServerError( "the property was not removed from all records") deleted = db.delete_property_tx(tx, model_id, property_name) if deleted is None: raise NotFound( f"Could not delete property [{model_id}.{property_name}]") PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=DeleteModelProperty( property_name=deleted.name, model_id=UUID(model.id), model_name=model.name, ), trace_id=TraceId(x_bf_trace_id), )
def get_records_related_to_package( db: PartitionedDatabase, proxy_type: str, package_id: str, concept_id_or_name: str, limit: Optional[int] = None, offset: Optional[int] = None, relationship_order_by: Optional[str] = None, record_order_by: Optional[str] = None, ascending: bool = False, ) -> List[JsonDict]: with db.transaction() as tx: x_bf_trace_id = AuditLogger.trace_id_header() model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) results = [] package_proxy_ids = [] record_ids = [] for pp, r in db.get_records_related_to_package_tx( tx=tx, package_id=PackageNodeId(package_id), related_model_id_or_name=concept_id_or_name, limit=limit, offset=offset, relationship_order_by=relationship_order_by, record_order_by=record_order_by, ascending=ascending, ): package_proxy_ids.append(str(pp.id)) record_ids.append(str(r.id)) t = ( # All package-to-record relationships are defined with the # internal `@IN_PACKAGE` relationship type: # (Package)<-[`@IN_PACKAGE`]-(Record) # For legacy consistency, we just use the generic "belongs_to" # here: make_proxy_relationship_instance(r.id, pp, "belongs_to"), to_concept_instance(r, model, properties), ) results.append(t) AuditLogger.get().message().append("package-proxies", *package_proxy_ids).append( "records", *record_ids).log(x_bf_trace_id) return results
def get_related( db: PartitionedDatabase, concept_id: str, id_: str, target_concept_id_or_name: str, relationship_order_by: Optional[str] = None, record_order_by: Optional[str] = None, ascending: Optional[bool] = True, limit: int = 100, offset: int = 0, include_incoming_linked_properties: bool = False, ) -> List[JsonDict]: with db.transaction() as tx: model = db.get_model_tx(tx, target_concept_id_or_name) properties = db.get_properties_tx(tx, target_concept_id_or_name) order_by: Optional[ModelOrderBy] = None asc = ascending if ascending is not None else True if record_order_by is not None: order_by = ModelOrderBy.field(name=record_order_by, ascending=asc) elif relationship_order_by is not None: order_by = ModelOrderBy.relationship( type=relationship_order_by, ascending=asc ) else: order_by = ModelOrderBy.field( name="created_at", ascending=True ) # default order for backwards compatibility related = db.get_related_records_tx( tx, start_from=id_, model_name=target_concept_id_or_name, order_by=order_by, limit=limit, offset=offset, include_incoming_linked_properties=include_incoming_linked_properties, ) return [ ( to_legacy_relationship_instance(rr) if rr.one_to_many else to_linked_property(rr), to_concept_instance(r, model, properties), ) for (rr, r) in related ]
def update_concept_instance( db: PartitionedDatabase, concept_id_or_name: str, concept_instance_id: str, body: JsonDict, ) -> JsonDict: with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) record = db.get_record_tx( tx, concept_instance_id, embed_linked=False, fill_missing=True, ) if record is None: raise NotFound(f"Could not get record {concept_instance_id}") updated_record = db.update_record_tx( tx, concept_instance_id, to_record(properties, body["values"]), fill_missing=True, ) x_bf_trace_id = AuditLogger.trace_id_header() # Emit a "UpdateRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=UpdateRecord( id=record.id, name=record.name, model_id=model.id, properties=UpdateRecord.compute_diff(properties, record.values, updated_record.values), ), trace_id=TraceId(x_bf_trace_id), ) return to_concept_instance(updated_record, model, properties)
def delete_concept_instances(db: PartitionedDatabase, concept_id_or_name: str) -> JsonDict: # HACK: request bodies on DELETE requests do not have defined # semantics and are not directly support by OpenAPI/Connexion. See # - https://swagger.io/docs/specification/describing-request-body # - https://github.com/zalando/connexion/issues/896 body = connexion.request.json success = [] errors = [] events = [] with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, model) for instance_id in body: try: deleted = db.delete_record_tx(tx, instance_id, properties) events.append( DeleteRecord( id=deleted.id, name=deleted.name, model_id=model.id, )) except Exception as e: # noqa: F841 errors.append([instance_id, f"Could not delete {instance_id}"]) else: success.append(instance_id) x_bf_trace_id = AuditLogger.trace_id_header() # Emit a "DeleteRecord" event: PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=events, trace_id=TraceId(x_bf_trace_id), ) return {"success": success, "errors": errors}
def delete_concept_instance(db: PartitionedDatabase, concept_id_or_name: str, concept_instance_id: str) -> None: with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) record = db.delete_record_tx(tx, concept_instance_id, properties) x_bf_trace_id = AuditLogger.trace_id_header() # Emit a "DeleteRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=DeleteRecord(id=record.id, name=record.name, model_id=model.id), trace_id=TraceId(x_bf_trace_id), ) return to_concept_instance(record, model, properties)
def create_concept_instance_batch(db: PartitionedDatabase, concept_id_or_name: str, body: JsonDict): with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) requests = [to_record(properties, req["values"]) for req in body] records = db.create_records_tx(tx, concept_id_or_name, requests, fill_missing=True) instances = [ to_concept_instance(r, model, properties) for r in records ] if not instances: raise BadRequest( f"Could not create concept instances for [{concept_id_or_name}]" ) # Log the created concept instance: x_bf_trace_id = AuditLogger.trace_id_header() # Emit "CreateRecord" events: PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=[ CreateRecord(id=r.id, name=r.name, model_id=model.id) for r in records ], trace_id=TraceId(x_bf_trace_id), ) AuditLogger.get().message().append("records", *[str(r.id) for r in records ]).log(x_bf_trace_id) return instances
def publish_schema( db: PartitionedDatabase, tx: Transaction, config: PublishConfig, s3, file_manifests: List[FileManifest], proxy_relationship_names: List[RelationshipName], ) -> Tuple[ExportGraphSchema, FileManifest]: """ Export the schema of the partitioned database into a `GraphSchema` instance. """ schema_models: List[ExportModel] = [] schema_relationships: List[ExportModelRelationship] = [] log.info("Exporting graph schema") models: List[Model] = db.get_models_tx(tx) model_index: Dict[UUID, Model] = {m.id: m for m in models} for m in models: log.info(f"Building schema for model '{m.name}'") properties: List[ModelProperty] = db.get_properties_tx(tx, m) linked_properties: List[ModelRelationship] = list( db.get_outgoing_model_relationships_tx(tx, from_model=m, one_to_many=False)) publish_properties: List[ExportProperty] = [ ExportProperty.model_property( name=p.name, display_name=p.display_name, description=p.description, data_type=p.data_type, ) for p in properties ] + [ ExportProperty.linked_property( name=r.name, display_name=r.display_name, description=r.description, data_type=LinkedModelDataType( to=model_index[r.to].name, file=str(OutputFile.csv_for_model(m.name)), ), ) for r in sorted(linked_properties, key=lambda l: l.index or sys.maxsize) ] model = ExportModel( model=m, name=m.name, display_name=m.display_name, description=m.description, properties=publish_properties, ) schema_models.append(model) # If any packages exist in this dataset, add a special-cased "File" model if len(file_manifests) > 0: log.info(f"Building schema for proxy package model") proxy_package_model = ExportModel.package_proxy() # TODO: gracefully handle this case to avoid overwriting "files.csv" assert not any( m.name == proxy_package_model.name for m in schema_models ), (f"Cannot export package proxy schema model with name '{proxy_package_model.name}' - " f"a model '{m.name}' already exists. See https://app.clickup.com/t/102ndc for issue" ) schema_models.append(proxy_package_model) relationships = db.get_outgoing_model_relationships_tx(tx, one_to_many=True) for r in relationships: log.info(f"Building schema for relationship '{r.name}'") relationship = ExportModelRelationship( relationship=r, name=r.name, from_=model_index[r.from_].name, to=model_index[r.to].name, ) schema_relationships.append(relationship) for p in proxy_relationship_names: log.info(f"Building schema for proxy relationship '{p}'") relationship = ExportModelRelationship(relationship=None, name=p, from_="", to="") schema_relationships.append(relationship) schema = ExportGraphSchema(models=schema_models, relationships=schema_relationships) # Write "schema.json" to S3 # ====================================================================== schema_output_file = OutputFile.json_for_schema().with_prefix( os.path.join(config.s3_publish_key, METADATA)) s3.put_object( Bucket=config.s3_bucket, Key=str(schema_output_file), Body=schema.to_json(camel_case=True, pretty_print=True, drop_nulls=True), RequestPayer="requester", ) schema_manifest = schema_output_file.with_prefix(METADATA).as_manifest( size_of(s3, config.s3_bucket, schema_output_file)) return schema, schema_manifest