def test_loader(neo4j): dataset_id = 29233 dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484" organization_id = 5 organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b" user_id = 114 user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f" db = PartitionedDatabase( db=neo4j, organization_id=OrganizationId(organization_id), dataset_id=DatasetId(dataset_id), user_id=user_node_id, organization_node_id=organization_node_id, dataset_node_id=dataset_node_id, ) load( dataset=f"{organization_id}/{dataset_id}", bucket="dev-neptune-export-use1", db=db, use_cache=False, smoke_test=False, ) # Models patient = db.get_model("patient") assert patient == Model( name="patient", display_name="Patient", description="", count=2, id="0b4b3615-9eaf-425d-9727-bcac29686fd5", created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:27.027Z"), updated_at=iso8601.parse("2019-11-01T20:01:27.027Z"), template_id=None, ) assert sorted(db.get_properties(patient), key=lambda p: p.index) == [ ModelProperty( id="7b17c60d-ca2a-4cf5-a4ff-a52bbc32ff17", name="name", display_name="Name", description="", index=0, locked=False, model_title=True, required=False, data_type=dt.String(), default=True, default_value=None, created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:37.633Z"), updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"), ), ModelProperty( id="e507b3ef-ade4-4672-83b4-f3f0774fb282", name="dob", display_name="DOB", description="", index=1, locked=False, model_title=False, required=False, data_type=dt.Date(), default=False, default_value=None, created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-11T15:11:17.383Z"), updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"), ), ] bicycle = db.get_model("bicycle") assert bicycle.name == "bicycle" assert bicycle.display_name == "Bicycle" assert bicycle.id == "bf858cb5-ae51-4fcf-ad74-b1887946f70f" assert bicycle.count == 1 assert bicycle.template_id == None properties = sorted(db.get_properties(bicycle), key=lambda p: p.index) assert len(properties) == 2 brand = properties[0] assert brand.name == "brand" color = properties[1] assert color.name == "color" assert color.data_type == dt.Array( items=dt.String(), enum=["purple", "blue", "orange", "green", "yellow", "red"]) # Records patients = db.get_all_records("patient") alice = Record( id=UUID("ecb71447-b684-c589-abda-b673c38edefc"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:58.537Z"), updated_at=iso8601.parse("2019-11-11T15:37:02.165Z"), values={ "name": "Alice", "dob": neotime.DateTime(year=2004, month=5, day=5, tzinfo=pytz.UTC), }, ) bob = Record( id=UUID("e2b71447-e29d-11c3-24c6-f2ebffd1486a"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:02:21.113Z"), updated_at=iso8601.parse("2019-11-01T20:02:21.113Z"), values={ "name": "Bob", # Embedded linked property "mother": RecordStub(id=UUID("ecb71447-b684-c589-abda-b673c38edefc"), title="Alice"), }, ) assert sorted(patients.results, key=lambda x: x.values["name"]) == [alice, bob] assert db.get_all_records("bicycle").results == [ Record( id=UUID("c8b71de8-cd9c-cc3f-67fe-4e30968d4e50"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-05T13:47:02.841Z"), updated_at=iso8601.parse("2019-11-11T15:12:28.042Z"), values={ "brand": "Bianchi", "color": ["red", "blue"] }, ) ] # Model relationships with db.transaction() as tx: assert list( db.get_outgoing_model_relationships_tx(tx, patient, one_to_many=True) ) == [ ModelRelationship( id="2e754729-684a-4c45-960f-348d68737d4d", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", display_name="Rides", description="", from_="0b4b3615-9eaf-425d-9727-bcac29686fd5", to="bf858cb5-ae51-4fcf-ad74-b1887946f70f", one_to_many=True, index=None, created_at=iso8601.parse("2019-11-05T13:47:17.981Z"), updated_at=iso8601.parse("2019-11-05T13:47:17.981Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # This relationship can be created in the Python client with the following: # # >>> patient = ds.models()["patient"] # >>> bike = ds.models()["bicycle"] # >>> bob = patient.get_all()[1] # >>> bianchi = bike.get_all()[0] # >>> bianchi.relate_to(bob, relationship_type="belongs_to") # # This reuses the `belongs_to` name even though that is disallowed through # the frontend. This means that the `belongs_to` CSV contains relationships # between proxy packages and records, *and* between records and records. assert list( db.get_outgoing_model_relationships_tx(tx, bicycle, one_to_many=True) ) == [ ModelRelationship( id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0", type="BELONGS_TO", name="belongs_to", display_name="Belongs To", description="", from_="bf858cb5-ae51-4fcf-ad74-b1887946f70f", to="0b4b3615-9eaf-425d-9727-bcac29686fd5", one_to_many=True, index=None, created_at=iso8601.parse("2019-11-21T16:47:36.918Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.918Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Model relationship stubs contain no "to" and "from" models, eg. belongs_to assert list(db.get_model_relationship_stubs_tx(tx)) == [ ModelRelationshipStub( id="ccf200d3-e77f-4d9e-bed3-f1f28860152f", name="belongs_to", display_name="Belongs To", description="", type="BELONGS_TO", created_at=iso8601.parse("2019-11-05T13:44:38.598Z"), updated_at=iso8601.parse("2019-11-05T13:44:38.598Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Duplicate @RELATED_TO relationships are created assert len(db.get_model_relationships_tx(tx, one_to_many=True)) == 2 # Record relationships assert list(db.get_outgoing_record_relationships_tx(tx, alice)) == [ RecordRelationship( id="d0b71de9-21f9-3557-edda-ad278dd81dc0", from_="ecb71447-b684-c589-abda-b673c38edefc", to="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", model_relationship_id="2e754729-684a-4c45-960f-348d68737d4d", display_name="Rides", one_to_many=True, created_at=iso8601.parse("2019-11-05T13:47:46.032Z"), updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert list( db.get_outgoing_record_relationships_tx( tx, "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", one_to_many=True) ) == [ RecordRelationship( id="aeb7476e-55f6-7924-5e43-a83cfa7e4cef", from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", to="e2b71447-e29d-11c3-24c6-f2ebffd1486a", type="BELONGS_TO", name="belongs_to", model_relationship_id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0", display_name="Belongs To", one_to_many=True, created_at=iso8601.parse("2019-11-21T16:47:36.938Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Linked properties assert list( db.get_outgoing_model_relationships_tx(tx, patient, one_to_many=False) ) == [ ModelRelationship( id="443e141b-f59c-419f-82c1-eed97925b04d", type="MOTHER", name="mother", display_name="Mother", description="", from_="0b4b3615-9eaf-425d-9727-bcac29686fd5", to="0b4b3615-9eaf-425d-9727-bcac29686fd5", one_to_many=False, index=1, created_at=iso8601.parse("2019-11-05T13:43:38.341Z"), updated_at=iso8601.parse("2019-11-05T13:43:38.341Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Duplicate @RELATED_TO relationships are created for linked properties assert len(db.get_model_relationships_tx(tx, one_to_many=False)) == 1 assert list( db.get_outgoing_record_relationships_tx(tx, bob, one_to_many=False) ) == [ RecordRelationship( id="fa3daedd-1761-4730-be7d-bb5de8e1261c", from_="e2b71447-e29d-11c3-24c6-f2ebffd1486a", to="ecb71447-b684-c589-abda-b673c38edefc", type="MOTHER", model_relationship_id="443e141b-f59c-419f-82c1-eed97925b04d", name="mother", display_name="Mother", one_to_many=False, created_at=iso8601.parse("2019-11-05T13:43:54.116Z"), updated_at=iso8601.parse("2019-11-05T13:43:54.116Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert db.get_package_proxies_for_record(alice, limit=10, offset=0) == ( 1, [ PackageProxy( id="00b71de7-b42f-1fe9-a83f-824452fe966e", proxy_instance_id="460591a0-8079-4979-a860-c3a4b18a32ad", package_id=184418, package_node_id= "N:package:b493794a-1c86-4c18-9fb9-dfdf236b1fe3", relationship_type="belongs_to", created_at=iso8601.parse("2019-11-05T13:44:38.748Z"), updated_at=iso8601.parse("2019-11-05T13:44:38.748Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ], ) # Packages link directly to dataset node assert db.count_packages() == 1
def get_model(db: PartitionedDatabase, model_id_or_name: str) -> JsonDict: return db.get_model(model_id_or_name).to_dict()
def get_all_properties( db: PartitionedDatabase, concept_id_or_name: str ) -> List[JsonDict]: return [to_property_dict(p) for p in db.get_properties(concept_id_or_name)]
def publish_schema( db: PartitionedDatabase, tx: Transaction, config: PublishConfig, s3, file_manifests: List[FileManifest], proxy_relationship_names: List[RelationshipName], ) -> Tuple[ExportGraphSchema, FileManifest]: """ Export the schema of the partitioned database into a `GraphSchema` instance. """ schema_models: List[ExportModel] = [] schema_relationships: List[ExportModelRelationship] = [] log.info("Exporting graph schema") models: List[Model] = db.get_models_tx(tx) model_index: Dict[UUID, Model] = {m.id: m for m in models} for m in models: log.info(f"Building schema for model '{m.name}'") properties: List[ModelProperty] = db.get_properties_tx(tx, m) linked_properties: List[ModelRelationship] = list( db.get_outgoing_model_relationships_tx(tx, from_model=m, one_to_many=False)) publish_properties: List[ExportProperty] = [ ExportProperty.model_property( name=p.name, display_name=p.display_name, description=p.description, data_type=p.data_type, ) for p in properties ] + [ ExportProperty.linked_property( name=r.name, display_name=r.display_name, description=r.description, data_type=LinkedModelDataType( to=model_index[r.to].name, file=str(OutputFile.csv_for_model(m.name)), ), ) for r in sorted(linked_properties, key=lambda l: l.index or sys.maxsize) ] model = ExportModel( model=m, name=m.name, display_name=m.display_name, description=m.description, properties=publish_properties, ) schema_models.append(model) # If any packages exist in this dataset, add a special-cased "File" model if len(file_manifests) > 0: log.info(f"Building schema for proxy package model") proxy_package_model = ExportModel.package_proxy() # TODO: gracefully handle this case to avoid overwriting "files.csv" assert not any( m.name == proxy_package_model.name for m in schema_models ), (f"Cannot export package proxy schema model with name '{proxy_package_model.name}' - " f"a model '{m.name}' already exists. See https://app.clickup.com/t/102ndc for issue" ) schema_models.append(proxy_package_model) relationships = db.get_outgoing_model_relationships_tx(tx, one_to_many=True) for r in relationships: log.info(f"Building schema for relationship '{r.name}'") relationship = ExportModelRelationship( relationship=r, name=r.name, from_=model_index[r.from_].name, to=model_index[r.to].name, ) schema_relationships.append(relationship) for p in proxy_relationship_names: log.info(f"Building schema for proxy relationship '{p}'") relationship = ExportModelRelationship(relationship=None, name=p, from_="", to="") schema_relationships.append(relationship) schema = ExportGraphSchema(models=schema_models, relationships=schema_relationships) # Write "schema.json" to S3 # ====================================================================== schema_output_file = OutputFile.json_for_schema().with_prefix( os.path.join(config.s3_publish_key, METADATA)) s3.put_object( Bucket=config.s3_bucket, Key=str(schema_output_file), Body=schema.to_json(camel_case=True, pretty_print=True, drop_nulls=True), RequestPayer="requester", ) schema_manifest = schema_output_file.with_prefix(METADATA).as_manifest( size_of(s3, config.s3_bucket, schema_output_file)) return schema, schema_manifest
def get_relationship(db: PartitionedDatabase, model_id_or_name: str, relationship_id: ModelRelationshipId) -> JsonDict: relationship = db.get_model_relationship(relationship_id) if relationship is None: raise NotFound(f"Could not get model relationship [{relationship_id}]") return relationship.to_dict()
def delete_relationship( db: PartitionedDatabase, record_id: RecordId, relationship_id: RecordRelationshipId ) -> RecordRelationshipId: with db.transaction() as tx: return db.delete_outgoing_record_relationship_tx(tx, record_id, relationship_id)
parser.add_argument("--user-node-id", type=str, default=str(uuid.uuid4())) parser.add_argument("--records", "-n", dest="n", type=int, default=1000) args = parser.parse_args() raw_db = Database.from_config(Config()) with raw_db.transaction() as tx: raw_db.initialize_organization_and_dataset( tx, organization_id=OrganizationId(args.organization_id), dataset_id=DatasetId(args.dataset_id), organization_node_id=OrganizationNodeId(args.organization_node_id), dataset_node_id=DatasetNodeId(args.dataset_node_id), ) db = PartitionedDatabase( raw_db, OrganizationId(args.organization_id), DatasetId(args.dataset_id), UserNodeId(args.user_node_id), OrganizationNodeId(args.organization_node_id), DatasetNodeId(args.dataset_node_id), ) load(db, args.input, verbose=True) db.db.driver.close() print("done")
def get_proxy_instance(db: PartitionedDatabase, proxy_type: str, id_: UUID) -> JsonDict: return to_proxy_instance(PROXY_TYPE, db.get_package_proxy(id_))
def delete_proxy_instance(db: PartitionedDatabase, proxy_type: str, id_: UUID) -> JsonDict: db.delete_package_proxy_by_id(id_) return None
def delete_orphaned_datasets_impl( bf_database: PennsieveDatabase, db: Database, organization_id: int, dry_run: bool = True, ): def completely_delete(partitioned_db): cumulative_counts = DatasetDeletionCounts.empty() sequential_failures = 0 while True: try: summary = partitioned_db.delete_dataset(batch_size=1000, duration=2000) if summary.done: return summary.update_counts(cumulative_counts) else: cumulative_counts = cumulative_counts.update( summary.counts) sequential_failures = 0 time.sleep(0.5) except Exception as e: sequential_failures += 1 log.warn("FAIL({sequential_failures}): {str(e)}") log.warn("WAITING 2s") time.sleep(2.0) if sequential_failures >= 5: raise e model_service_dataset_ids = db.get_dataset_ids( OrganizationId(organization_id)) api_dataset_ids = bf_database.get_dataset_ids(organization_id) orphaned_dataset_ids = set(model_service_dataset_ids) - set( api_dataset_ids) if dry_run: log.info(f"""{"*" * 20} DRY RUN {"*" * 20}""") log.info(f"Found {len(orphaned_dataset_ids)} orphaned dataset(s)") for dataset_id in orphaned_dataset_ids: ds = bf_database.get_dataset(organization_id, dataset_id) assert ds is None or ds.state == "DELETING" log.info( f"Deleting: organization={organization_id} / dataset={dataset_id} ({db.count_child_nodes(organization_id, dataset_id)}) => {ds}" ) else: log.info(f"Found {len(orphaned_dataset_ids)} orphaned dataset(s)") for dataset_id in orphaned_dataset_ids: ds = bf_database.get_dataset(organization_id, dataset_id) assert ds is None or ds.state == "DELETING" partitioned_db = PartitionedDatabase( db, OrganizationId(organization_id), DatasetId(dataset_id), UserNodeId("dataset-delete-migration"), ) log.info( f"Deleting: organization={organization_id} / dataset={dataset_id} ({db.count_child_nodes(organization_id, dataset_id)}) => {ds}" ) summary = completely_delete(partitioned_db) log.info(str(summary)) log.info("Done")
def migrate_dataset( organization_id: int, dataset_ids: Optional[List[int]] = None, remove_existing: bool = False, environment: str = "dev", jumpbox: Optional[str] = "non-prod", smoke_test: bool = True, remap_ids: bool = False, ): # TODO does this need node IDs? if dataset_ids is None and remove_existing: raise Exception( f"Cannot remove existing data from Neo4j while migrating the entire organization {organization_id}" ) elif dataset_ids is None and remap_ids: raise Exception(f"Can only remap IDs for a single dataset") elif dataset_ids is None: entire_organization = True else: entire_organization = False settings = SSMParameters(environment) with SSHTunnel( remote_host=settings.postgres_host, remote_port=settings.postgres_port, local_port=7777, jumpbox=jumpbox, ) as postgres_tunnel, SSHTunnel( remote_host=settings.neo4j_host, remote_port=settings.neo4j_port, local_port=8888, jumpbox=jumpbox, ) as neo4j_tunnel: engine, factory = migrate_db.get_postgres( f"postgresql://{settings.postgres_user}:{settings.postgres_password}@{postgres_tunnel.host}:{postgres_tunnel.port}/{settings.postgres_db}" ) neo4j = Database( uri=f"bolt://{neo4j_tunnel.host}:{neo4j_tunnel.port}", user=settings.neo4j_user, password=settings.neo4j_password, max_connection_lifetime=300, ) bf_database = PennsieveDatabase(engine, factory, organization_id) # 1) Get the target datasets for the migration if dataset_ids is None: dataset_ids = bf_database.get_dataset_ids(organization_id) for dataset_id in dataset_ids: print(f"Migrating dataset {dataset_id}") partitioned_db = PartitionedDatabase( neo4j, organization_id=organization_id, dataset_id=dataset_id, user_id=0) # 3) Lock dataset in Pennsieve DB bf_database.lock_dataset(organization_id, dataset_id) print(f"Got dataset {dataset_id}") try: # 4) Export data to S3 from Neptune export_from_neptune( settings, postgres_tunnel=postgres_tunnel, organization_id=organization_id, dataset_id=dataset_id, jumpbox=jumpbox, smoke_test=smoke_test, ) # 5) Import into Neo4j from S3 import_to_neo4j.load( dataset=f"{organization_id}/{dataset_id}", bucket=settings.export_bucket, db=partitioned_db, cutover=True, remove_existing=remove_existing, smoke_test=smoke_test, remap_ids=remap_ids, ) finally: # Whatever happens, unlock the dataset bf_database.unlock_dataset(organization_id, dataset_id) # 6) Sanity check that all datasets in the organization have been # migrated, then mark the organization as migrated. if entire_organization: print("Validating migration....") for dataset_id in bf_database.get_dataset_ids(organization_id): partitioned_db = PartitionedDatabase( neo4j, organization_id=organization_id, dataset_id=dataset_id, user_id=0, ) neo4j.toggle_service_for_organization( organization_id=organization_id) print("Done.")
def test_rewrite_ids_and_import(neo4j): """ Test that UUIDs are remapped to the exact correct place with a manually defined remapping. """ dataset_id = 60000 dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484" organization_id = 5 organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b" user_id = 114 user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f" db = PartitionedDatabase( db=neo4j, organization_id=OrganizationId(organization_id), dataset_id=DatasetId(dataset_id), user_id=user_node_id, organization_node_id=organization_node_id, dataset_node_id=dataset_node_id, ) REMAPPING = { "0b4b3615-9eaf-425d-9727-bcac29686fd5": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "7b17c60d-ca2a-4cf5-a4ff-a52bbc32ff17": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "e507b3ef-ade4-4672-83b4-f3f0774fb282": "cccccccc-cccc-cccc-cccc-cccccccccccc", "bf858cb5-ae51-4fcf-ad74-b1887946f70f": "dddddddd-dddd-dddd-dddd-dddddddddddd", "a99b09f5-caa6-4282-aa0e-cf56bde89254": "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee", "42fa4eb9-51cc-4c59-b550-ac24d6d5024a": "ffffffff-ffff-ffff-ffff-ffffffffffff", "ecb71447-b684-c589-abda-b673c38edefc": "00000000-0000-0000-0000-000000000000", "e2b71447-e29d-11c3-24c6-f2ebffd1486a": "11111111-1111-1111-1111-111111111111", "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50": "22222222-2222-2222-2222-222222222222", "2e754729-684a-4c45-960f-348d68737d4d": "33333333-3333-3333-3333-333333333333", "175ff55b-b44d-4381-bd59-d4dbc0b9c5f0": "44444444-4444-4444-4444-444444444444", "ccf200d3-e77f-4d9e-bed3-f1f28860152f": "55555555-5555-5555-5555-555555555555", "443e141b-f59c-419f-82c1-eed97925b04d": "66666666-6666-6666-6666-666666666666", "d0b71de9-21f9-3557-edda-ad278dd81dc0": "77777777-7777-7777-7777-777777777777", "aeb7476e-55f6-7924-5e43-a83cfa7e4cef": "88888888-8888-8888-8888-888888888888", "fa3daedd-1761-4730-be7d-bb5de8e1261c": "99999999-9999-9999-9999-999999999999", "00b71de7-b42f-1fe9-a83f-824452fe966e": "aaaaaaaa-aaaa-aaaa-aaaa-bbbbbbbbbbbb", "460591a0-8079-4979-a860-c3a4b18a32ad": "aaaaaaaa-aaaa-aaaa-aaaa-cccccccccccc", } def generate_new_id(old_id): new_id = REMAPPING.get(old_id, None) if new_id is None: return old_id return new_id load( dataset=f"{organization_id}/{dataset_id}", bucket="dev-neptune-export-use1", db=db, use_cache=False, smoke_test=False, remap_ids=True, generate_new_id=generate_new_id, ) # Models patient = db.get_model("patient") assert patient == Model( name="patient", display_name="Patient", description="", count=2, id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:27.027Z"), updated_at=iso8601.parse("2019-11-01T20:01:27.027Z"), template_id=None, ) assert sorted(db.get_properties(patient), key=lambda p: p.index) == [ ModelProperty( id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", name="name", display_name="Name", description="", index=0, locked=False, model_title=True, required=False, data_type=dt.String(), default=True, default_value=None, created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:37.633Z"), updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"), ), ModelProperty( id="cccccccc-cccc-cccc-cccc-cccccccccccc", name="dob", display_name="DOB", description="", index=1, locked=False, model_title=False, required=False, data_type=dt.Date(), default=False, default_value=None, created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-11T15:11:17.383Z"), updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"), ), ] bicycle = db.get_model("bicycle") assert bicycle.name == "bicycle" assert bicycle.display_name == "Bicycle" assert bicycle.id == "dddddddd-dddd-dddd-dddd-dddddddddddd" assert bicycle.count == 1 assert bicycle.template_id == None properties = sorted(db.get_properties(bicycle), key=lambda p: p.index) assert len(properties) == 2 brand = properties[0] assert brand.name == "brand" assert brand.id == "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee" color = properties[1] assert color.name == "color" assert color.data_type == dt.Array( items=dt.String(), enum=["purple", "blue", "orange", "green", "yellow", "red"]) assert color.id == "ffffffff-ffff-ffff-ffff-ffffffffffff" # Records patients = db.get_all_records("patient") alice = Record( id=UUID("00000000-0000-0000-0000-000000000000"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:58.537Z"), updated_at=iso8601.parse("2019-11-11T15:37:02.165Z"), values={ "name": "Alice", "dob": neotime.DateTime(year=2004, month=5, day=5, tzinfo=pytz.UTC), }, ) bob = Record( id=UUID("11111111-1111-1111-1111-111111111111"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:02:21.113Z"), updated_at=iso8601.parse("2019-11-01T20:02:21.113Z"), values={ "name": "Bob", # Embedded linked property "mother": RecordStub(id=UUID("00000000-0000-0000-0000-000000000000"), title="Alice"), }, ) assert sorted(patients.results, key=lambda x: x.values["name"]) == [alice, bob] assert db.get_all_records("bicycle").results == [ Record( id=UUID("22222222-2222-2222-2222-222222222222"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-05T13:47:02.841Z"), updated_at=iso8601.parse("2019-11-11T15:12:28.042Z"), values={ "brand": "Bianchi", "color": ["red", "blue"] }, ) ] # Model relationships with db.transaction() as tx: assert list( db.get_outgoing_model_relationships_tx(tx, patient, one_to_many=True) ) == [ ModelRelationship( id="33333333-3333-3333-3333-333333333333", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", display_name="Rides", description="", from_="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", to="dddddddd-dddd-dddd-dddd-dddddddddddd", one_to_many=True, index=None, created_at=iso8601.parse("2019-11-05T13:47:17.981Z"), updated_at=iso8601.parse("2019-11-05T13:47:17.981Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert list( db.get_outgoing_model_relationships_tx(tx, bicycle, one_to_many=True) ) == [ ModelRelationship( id="44444444-4444-4444-4444-444444444444", type="BELONGS_TO", name="belongs_to", display_name="Belongs To", description="", from_="dddddddd-dddd-dddd-dddd-dddddddddddd", to="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", one_to_many=True, index=None, created_at=iso8601.parse("2019-11-21T16:47:36.918Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.918Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Model relationship stubs contain no "to" and "from" models, eg. belongs_to assert list(db.get_model_relationship_stubs_tx(tx)) == [ ModelRelationshipStub( id="55555555-5555-5555-5555-555555555555", name="belongs_to", display_name="Belongs To", description="", type="BELONGS_TO", created_at=iso8601.parse("2019-11-05T13:44:38.598Z"), updated_at=iso8601.parse("2019-11-05T13:44:38.598Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Duplicate @RELATED_TO relationships are created assert len(db.get_model_relationships_tx(tx, one_to_many=True)) == 2 # Record relationships assert list(db.get_outgoing_record_relationships_tx(tx, alice)) == [ RecordRelationship( id="77777777-7777-7777-7777-777777777777", from_="00000000-0000-0000-0000-000000000000", to="22222222-2222-2222-2222-222222222222", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", model_relationship_id="33333333-3333-3333-3333-333333333333", display_name="Rides", one_to_many=True, created_at=iso8601.parse("2019-11-05T13:47:46.032Z"), updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert list( db.get_outgoing_record_relationships_tx( tx, "22222222-2222-2222-2222-222222222222", one_to_many=True) ) == [ RecordRelationship( id="88888888-8888-8888-8888-888888888888", from_="22222222-2222-2222-2222-222222222222", to="11111111-1111-1111-1111-111111111111", type="BELONGS_TO", name="belongs_to", model_relationship_id="44444444-4444-4444-4444-444444444444", display_name="Belongs To", one_to_many=True, created_at=iso8601.parse("2019-11-21T16:47:36.938Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Linked properties assert list( db.get_outgoing_model_relationships_tx(tx, patient, one_to_many=False) ) == [ ModelRelationship( id="66666666-6666-6666-6666-666666666666", type="MOTHER", name="mother", display_name="Mother", description="", from_="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", to="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", one_to_many=False, index=1, created_at=iso8601.parse("2019-11-05T13:43:38.341Z"), updated_at=iso8601.parse("2019-11-05T13:43:38.341Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Duplicate @RELATED_TO relationships are created for linked properties assert len(db.get_model_relationships_tx(tx, one_to_many=False)) == 1 assert list( db.get_outgoing_record_relationships_tx(tx, bob, one_to_many=False) ) == [ RecordRelationship( id="99999999-9999-9999-9999-999999999999", from_="11111111-1111-1111-1111-111111111111", to="00000000-0000-0000-0000-000000000000", type="MOTHER", model_relationship_id="66666666-6666-6666-6666-666666666666", name="mother", display_name="Mother", one_to_many=False, created_at=iso8601.parse("2019-11-05T13:43:54.116Z"), updated_at=iso8601.parse("2019-11-05T13:43:54.116Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert db.get_package_proxies_for_record(alice, limit=10, offset=0) == ( 1, [ PackageProxy( id="aaaaaaaa-aaaa-aaaa-aaaa-bbbbbbbbbbbb", proxy_instance_id="aaaaaaaa-aaaa-aaaa-aaaa-cccccccccccc", package_id=184418, package_node_id= "N:package:b493794a-1c86-4c18-9fb9-dfdf236b1fe3", relationship_type="belongs_to", created_at=iso8601.parse("2019-11-05T13:44:38.748Z"), updated_at=iso8601.parse("2019-11-05T13:44:38.748Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ], ) # Packages link directly to dataset node assert db.count_packages() == 1
def test_fix_multiple_mismatched_model_relationships(neo4j): """ Case when a record relationship has a schemaRelationshipId that does not match the (from)-[relationship]->(to) triple of the relationship, but a compatible model relationship does exist in the dataset. In this case, an extra model relationship (id = `4ea7d9e6-b2ae-487c-981c-8202e37343f9`) connects (bike)-[rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c]->(bike) and the record relationship (person)-[rides]->(bike) incorrectly uses this schemaRelationshipId. AND Another record relationship, with the same relationship type, also claiming the same schemaRelationshipId, but actually connecting different models. This corrupted data is from old versions of the graph-ingest tool. """ dataset_id = 50000 dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484" organization_id = 5 organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b" user_id = 114 user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f" db = PartitionedDatabase( db=neo4j, organization_id=OrganizationId(organization_id), dataset_id=DatasetId(dataset_id), user_id=user_node_id, organization_node_id=organization_node_id, dataset_node_id=dataset_node_id, ) load( dataset=f"{organization_id}/{dataset_id}", bucket="dev-neptune-export-use1", db=db, use_cache=False, smoke_test=False, ) with db.transaction() as tx: assert list( db.get_outgoing_record_relationships_tx( tx, "ecb71447-b684-c589-abda-b673c38edefc") ) == [ RecordRelationship( id="d0b71de9-21f9-3557-edda-ad278dd81dc0", from_="ecb71447-b684-c589-abda-b673c38edefc", to="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", model_relationship_id="2e754729-684a-4c45-960f-348d68737d4d", display_name="Rides", one_to_many=True, created_at=iso8601.parse("2019-11-05T13:47:46.032Z"), updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert sorted( db.get_outgoing_record_relationships_tx( tx, "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", one_to_many=True), key=lambda r: r.type, ) == [ RecordRelationship( id="aeb7476e-55f6-7924-5e43-a83cfa7e4cef", from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", to="e2b71447-e29d-11c3-24c6-f2ebffd1486a", type="BELONGS_TO", name="belongs_to", model_relationship_id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0", display_name="Belongs To", one_to_many=True, created_at=iso8601.parse("2019-11-21T16:47:36.938Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ), RecordRelationship( id="83227fcd-ec4d-47e6-a3a9-4c9fbba4a2f3", from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", to="e2b71447-e29d-11c3-24c6-f2ebffd1486a", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", model_relationship_id="92b6a32f-0597-4a7a-944b-27b39998283e", display_name="Rides", one_to_many=True, created_at=iso8601.parse("2019-11-05T13:47:46.032Z"), updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ), ]
def test_fix_nonexistent_model_relationships(neo4j): """ Case when a record relationship has a schemaRelationshipId that does not exist, but a model relationhip (d0b71de9-21f9-3557-edda-ad278dd81dc0) is defined that matches the (from)-[relationship]->(to) triple of the relationship. This corrupted data is from old versions of the graph-ingest tool. """ dataset_id = 30000 dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484" organization_id = 5 organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b" user_id = 114 user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f" db = PartitionedDatabase( db=neo4j, organization_id=OrganizationId(organization_id), dataset_id=DatasetId(dataset_id), user_id=user_node_id, organization_node_id=organization_node_id, dataset_node_id=dataset_node_id, ) load( dataset=f"{organization_id}/{dataset_id}", bucket="dev-neptune-export-use1", db=db, use_cache=False, smoke_test=False, ) with db.transaction() as tx: assert list( db.get_outgoing_record_relationships_tx( tx, "ecb71447-b684-c589-abda-b673c38edefc") ) == [ RecordRelationship( id="d0b71de9-21f9-3557-edda-ad278dd81dc0", from_="ecb71447-b684-c589-abda-b673c38edefc", to="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", model_relationship_id="2e754729-684a-4c45-960f-348d68737d4d", display_name="Rides", one_to_many=True, created_at=iso8601.parse("2019-11-05T13:47:46.032Z"), updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert list( db.get_outgoing_record_relationships_tx( tx, "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", one_to_many=True) ) == [ RecordRelationship( id="aeb7476e-55f6-7924-5e43-a83cfa7e4cef", from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", to="e2b71447-e29d-11c3-24c6-f2ebffd1486a", type="BELONGS_TO", name="belongs_to", model_relationship_id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0", display_name="Belongs To", one_to_many=True, created_at=iso8601.parse("2019-11-21T16:47:36.938Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ]
def get_concept(db: PartitionedDatabase, concept_id_or_name: str) -> JsonDict: with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) property_count = db.get_property_counts_tx(tx, [model.id])[model.id] return to_concept_dict(model, property_count)
def get_proxy_relationship_counts(db: PartitionedDatabase, proxy_type: str, node_id: str) -> List[JsonDict]: return [ count.to_dict() for count in db.get_proxy_relationship_counts(PackageNodeId(node_id)) ]
def delete_package_proxy( db: PartitionedDatabase, record_id: RecordId, package_id: PackageId ) -> JsonDict: return db.delete_package_proxy(record_id, package_id).to_dict()
def delete_schema_linked_property(db: PartitionedDatabase, id_: str, link_id: ModelRelationshipId): # Ignore the ID of the model/concept, as it's not needed to actually # fetch the linked property: with db.transaction() as tx: return db.delete_model_relationship_tx(tx=tx, relationship=link_id)
def get_record(db: PartitionedDatabase, record_id: RecordId, linked: bool) -> JsonDict: record = db.get_record(record_id, embed_linked=linked) if record is None: raise NotFound(f"Could not get record {record_id}") return record.to_dict()
def get_all_schema_linked_properties(db: PartitionedDatabase): with db.transaction() as tx: return [ to_schema_linked_property_target(relationship) for relationship in db.get_outgoing_model_relationships_tx(tx=tx, one_to_many=False) ]
def delete_dataset(db: PartitionedDatabase, batch_size: int = 1000, duration: int = 5000) -> JsonDict: return db.delete_dataset(batch_size=batch_size, duration=duration).to_dict()
def get_relationship(db: PartitionedDatabase, record_id: RecordId, relationship_id: RecordRelationshipId) -> JsonDict: with db.transaction() as tx: return db.get_outgoing_record_relationship_tx( tx, record_id, relationship_id).to_dict()
def publish_dataset( db: PartitionedDatabase, s3, config: PublishConfig, file_manifests: List[FileManifest], ) -> List[FileManifest]: """ Publish the dataset provided by the partitioned database view in `db`. Returns a list of file manifests for each file written to S3. """ graph_manifests: List[FileManifest] = [] with db.transaction() as tx: # 0) Get all existing proxy relationships. We'll need these in a few places. # ====================================================================== proxies_by_relationship: Dict[ RelationshipName, List[PackageProxyRelationship]] = defaultdict(list) for pp in package_proxy_relationships(db, tx, config, s3, file_manifests): proxies_by_relationship[pp.relationship].append(pp) # 1) Publish graph schema # ====================================================================== schema, schema_manifest = publish_schema( db, tx, config, s3, file_manifests, list(proxies_by_relationship.keys())) graph_manifests.append(schema_manifest) # 2) Publish record CSVs # ====================================================================== for m in schema.models: if m.model: graph_manifests.append( publish_records_of_model(db, tx, m.model, config, s3)) # 3) Publish source file CSV # ====================================================================== if len(file_manifests) > 0: graph_manifests.append( publish_package_proxy_files(db, tx, config, s3, file_manifests)) # 4) Publish relationship CSVs # ====================================================================== # All relationships with the same name need to go in the same CSV file, # along with proxy relationships with the same name. relationships_by_name: Dict[ RelationshipName, List[ModelRelationship]] = defaultdict(list) for r in schema.relationships: if not r.is_proxy_relationship(): assert r.relationship is not None relationships_by_name[r.name].append(r.relationship) for relationship_name in set( chain(relationships_by_name.keys(), proxies_by_relationship.keys())): graph_manifests.append( publish_relationships( db, tx, relationship_name, relationships_by_name[relationship_name], proxies_by_relationship[relationship_name], config, s3, )) return graph_manifests
) parser.add_argument( "--s3-bucket", type=str, help="AWS S3 target bucket, either for embargoed or published datasets", action=env_action("S3_BUCKET"), ) if __name__ == "__main__": configure_logging("INFO") args = parser.parse_args() db = PartitionedDatabase.get_from_env( organization_id=OrganizationId(args.organization_id), dataset_id=DatasetId(args.dataset_id), user_id=UserNodeId(args.user_node_id), organization_node_id=OrganizationNodeId(args.organization_node_id), dataset_node_id=DatasetNodeId(args.dataset_node_id), ) s3 = boto3.client("s3", region_name="us-east-1") config = PublishConfig(s3_publish_key=args.s3_publish_key, s3_bucket=args.s3_bucket) file_manifests = read_file_manifests(s3, config) graph_manifests = publish_dataset(db, s3, config, file_manifests=file_manifests)
def get_all_models(db: PartitionedDatabase, ) -> List[JsonDict]: return [m.to_dict() for m in db.get_models()]
def get_topology(db: PartitionedDatabase, id_: str) -> JsonDict: return [to_legacy_topology(t) for t in db.topology(id_)]
def get_all_properties(db: PartitionedDatabase, model_id_or_name: str) -> List[JsonDict]: return [p.to_dict() for p in db.get_properties(model_id_or_name)]
def get_all_concepts(db: PartitionedDatabase) -> List[JsonDict]: with db.transaction() as tx: models = db.get_models_tx(tx) property_counts = db.get_property_counts_tx( tx, [model.id for model in models]) return [to_concept_dict(m, property_counts[m.id]) for m in models]
def get_graph_summary(db: PartitionedDatabase) -> JsonDict: return db.summarize().to_dict()
def test_rewrite_ids_randomly_and_import(neo4j): """ Test that UUIDs are remapped using the default random remapper. """ dataset_id = 70000 dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484" organization_id = 5 organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b" user_id = 114 user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f" db = PartitionedDatabase( db=neo4j, organization_id=OrganizationId(organization_id), dataset_id=DatasetId(dataset_id), user_id=user_node_id, organization_node_id=organization_node_id, dataset_node_id=dataset_node_id, ) load( dataset=f"{organization_id}/{dataset_id}", bucket="dev-neptune-export-use1", db=db, use_cache=False, smoke_test=False, remap_ids=True, ) # Models patient = db.get_model("patient") assert patient is not None assert len(db.get_properties(patient)) == 2 assert patient.id != UUID("0b4b3615-9eaf-425d-9727-bcac29686fd5") bicycle = db.get_model("bicycle") assert bicycle is not None assert len(db.get_properties(bicycle)) == 2 assert bicycle.id != UUID("bf858cb5-ae51-4fcf-ad74-b1887946f70f") # Records patients = db.get_all_records("patient") alice = [r for r in patients if r.values["name"] == "Alice"][0] assert alice.id != "ecb71447-b684-c589-abda-b673c38edefc" bob = [r for r in patients if r.values["name"] == "Bob"][0] assert bob.id != UUID("e2b71447-e29d-11c3-24c6-f2ebffd1486a") assert len(db.get_all_records("bicycle").results) == 1 bianchi = db.get_all_records("bicycle").results[0] assert bianchi.id != UUID("c8b71de8-cd9c-cc3f-67fe-4e30968d4e50") # Model relationships with db.transaction() as tx: assert (len( list( db.get_outgoing_model_relationships_tx( tx, patient, one_to_many=True))) == 1) assert (len( list( db.get_outgoing_model_relationships_tx( tx, bicycle, one_to_many=True))) == 1) # Model relationship stubs contain no "to" and "from" models, eg. belongs_to assert len(list(db.get_model_relationship_stubs_tx(tx))) == 1 # Duplicate @RELATED_TO relationships are created assert len(list(db.get_model_relationships_tx(tx, one_to_many=True))) == 2 # Record relationships assert len(list(db.get_outgoing_record_relationships_tx(tx, alice))) == 1 assert (len( list( db.get_outgoing_record_relationships_tx( tx, bianchi, one_to_many=True))) == 1) # Linked properties assert (len( list( db.get_outgoing_model_relationships_tx( tx, patient, one_to_many=False))) == 1) # Duplicate @RELATED_TO relationships are created for linked properties assert len(list(db.get_model_relationships_tx(tx, one_to_many=False))) == 1 assert (len( list( db.get_outgoing_record_relationships_tx( tx, bob, one_to_many=False))) == 1) assert len( db.get_package_proxies_for_record(alice, limit=10, offset=0)[1]) == 1 # Packages link directly to dataset node assert db.count_packages() == 1