Exemplo n.º 1
0
def test_loader(neo4j):
    dataset_id = 29233
    dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484"
    organization_id = 5
    organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b"
    user_id = 114
    user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f"

    db = PartitionedDatabase(
        db=neo4j,
        organization_id=OrganizationId(organization_id),
        dataset_id=DatasetId(dataset_id),
        user_id=user_node_id,
        organization_node_id=organization_node_id,
        dataset_node_id=dataset_node_id,
    )

    load(
        dataset=f"{organization_id}/{dataset_id}",
        bucket="dev-neptune-export-use1",
        db=db,
        use_cache=False,
        smoke_test=False,
    )

    # Models

    patient = db.get_model("patient")
    assert patient == Model(
        name="patient",
        display_name="Patient",
        description="",
        count=2,
        id="0b4b3615-9eaf-425d-9727-bcac29686fd5",
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:01:27.027Z"),
        updated_at=iso8601.parse("2019-11-01T20:01:27.027Z"),
        template_id=None,
    )

    assert sorted(db.get_properties(patient), key=lambda p: p.index) == [
        ModelProperty(
            id="7b17c60d-ca2a-4cf5-a4ff-a52bbc32ff17",
            name="name",
            display_name="Name",
            description="",
            index=0,
            locked=False,
            model_title=True,
            required=False,
            data_type=dt.String(),
            default=True,
            default_value=None,
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-01T20:01:37.633Z"),
            updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
        ),
        ModelProperty(
            id="e507b3ef-ade4-4672-83b4-f3f0774fb282",
            name="dob",
            display_name="DOB",
            description="",
            index=1,
            locked=False,
            model_title=False,
            required=False,
            data_type=dt.Date(),
            default=False,
            default_value=None,
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
            updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
        ),
    ]

    bicycle = db.get_model("bicycle")
    assert bicycle.name == "bicycle"
    assert bicycle.display_name == "Bicycle"
    assert bicycle.id == "bf858cb5-ae51-4fcf-ad74-b1887946f70f"
    assert bicycle.count == 1
    assert bicycle.template_id == None

    properties = sorted(db.get_properties(bicycle), key=lambda p: p.index)
    assert len(properties) == 2
    brand = properties[0]
    assert brand.name == "brand"

    color = properties[1]
    assert color.name == "color"
    assert color.data_type == dt.Array(
        items=dt.String(),
        enum=["purple", "blue", "orange", "green", "yellow", "red"])

    # Records
    patients = db.get_all_records("patient")

    alice = Record(
        id=UUID("ecb71447-b684-c589-abda-b673c38edefc"),
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:01:58.537Z"),
        updated_at=iso8601.parse("2019-11-11T15:37:02.165Z"),
        values={
            "name": "Alice",
            "dob": neotime.DateTime(year=2004, month=5, day=5,
                                    tzinfo=pytz.UTC),
        },
    )
    bob = Record(
        id=UUID("e2b71447-e29d-11c3-24c6-f2ebffd1486a"),
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:02:21.113Z"),
        updated_at=iso8601.parse("2019-11-01T20:02:21.113Z"),
        values={
            "name":
            "Bob",
            # Embedded linked property
            "mother":
            RecordStub(id=UUID("ecb71447-b684-c589-abda-b673c38edefc"),
                       title="Alice"),
        },
    )
    assert sorted(patients.results,
                  key=lambda x: x.values["name"]) == [alice, bob]

    assert db.get_all_records("bicycle").results == [
        Record(
            id=UUID("c8b71de8-cd9c-cc3f-67fe-4e30968d4e50"),
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-05T13:47:02.841Z"),
            updated_at=iso8601.parse("2019-11-11T15:12:28.042Z"),
            values={
                "brand": "Bianchi",
                "color": ["red", "blue"]
            },
        )
    ]

    # Model relationships
    with db.transaction() as tx:

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   patient,
                                                   one_to_many=True)
        ) == [
            ModelRelationship(
                id="2e754729-684a-4c45-960f-348d68737d4d",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                display_name="Rides",
                description="",
                from_="0b4b3615-9eaf-425d-9727-bcac29686fd5",
                to="bf858cb5-ae51-4fcf-ad74-b1887946f70f",
                one_to_many=True,
                index=None,
                created_at=iso8601.parse("2019-11-05T13:47:17.981Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:17.981Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # This relationship can be created in the Python client with the following:
        #
        #  >>> patient = ds.models()["patient"]
        #  >>> bike = ds.models()["bicycle"]
        #  >>> bob = patient.get_all()[1]
        #  >>> bianchi = bike.get_all()[0]
        #  >>> bianchi.relate_to(bob, relationship_type="belongs_to")
        #
        # This reuses the `belongs_to` name even though that is disallowed through
        # the frontend.  This means that the `belongs_to` CSV contains relationships
        # between proxy packages and records, *and* between records and records.

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   bicycle,
                                                   one_to_many=True)
        ) == [
            ModelRelationship(
                id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0",
                type="BELONGS_TO",
                name="belongs_to",
                display_name="Belongs To",
                description="",
                from_="bf858cb5-ae51-4fcf-ad74-b1887946f70f",
                to="0b4b3615-9eaf-425d-9727-bcac29686fd5",
                one_to_many=True,
                index=None,
                created_at=iso8601.parse("2019-11-21T16:47:36.918Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.918Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Model relationship stubs contain no "to" and "from" models, eg. belongs_to

        assert list(db.get_model_relationship_stubs_tx(tx)) == [
            ModelRelationshipStub(
                id="ccf200d3-e77f-4d9e-bed3-f1f28860152f",
                name="belongs_to",
                display_name="Belongs To",
                description="",
                type="BELONGS_TO",
                created_at=iso8601.parse("2019-11-05T13:44:38.598Z"),
                updated_at=iso8601.parse("2019-11-05T13:44:38.598Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Duplicate @RELATED_TO relationships are created
        assert len(db.get_model_relationships_tx(tx, one_to_many=True)) == 2

        # Record relationships

        assert list(db.get_outgoing_record_relationships_tx(tx, alice)) == [
            RecordRelationship(
                id="d0b71de9-21f9-3557-edda-ad278dd81dc0",
                from_="ecb71447-b684-c589-abda-b673c38edefc",
                to="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                model_relationship_id="2e754729-684a-4c45-960f-348d68737d4d",
                display_name="Rides",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        assert list(
            db.get_outgoing_record_relationships_tx(
                tx, "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", one_to_many=True)
        ) == [
            RecordRelationship(
                id="aeb7476e-55f6-7924-5e43-a83cfa7e4cef",
                from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                to="e2b71447-e29d-11c3-24c6-f2ebffd1486a",
                type="BELONGS_TO",
                name="belongs_to",
                model_relationship_id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0",
                display_name="Belongs To",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Linked properties

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   patient,
                                                   one_to_many=False)
        ) == [
            ModelRelationship(
                id="443e141b-f59c-419f-82c1-eed97925b04d",
                type="MOTHER",
                name="mother",
                display_name="Mother",
                description="",
                from_="0b4b3615-9eaf-425d-9727-bcac29686fd5",
                to="0b4b3615-9eaf-425d-9727-bcac29686fd5",
                one_to_many=False,
                index=1,
                created_at=iso8601.parse("2019-11-05T13:43:38.341Z"),
                updated_at=iso8601.parse("2019-11-05T13:43:38.341Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Duplicate @RELATED_TO relationships are created for linked properties
        assert len(db.get_model_relationships_tx(tx, one_to_many=False)) == 1

        assert list(
            db.get_outgoing_record_relationships_tx(tx, bob, one_to_many=False)
        ) == [
            RecordRelationship(
                id="fa3daedd-1761-4730-be7d-bb5de8e1261c",
                from_="e2b71447-e29d-11c3-24c6-f2ebffd1486a",
                to="ecb71447-b684-c589-abda-b673c38edefc",
                type="MOTHER",
                model_relationship_id="443e141b-f59c-419f-82c1-eed97925b04d",
                name="mother",
                display_name="Mother",
                one_to_many=False,
                created_at=iso8601.parse("2019-11-05T13:43:54.116Z"),
                updated_at=iso8601.parse("2019-11-05T13:43:54.116Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

    assert db.get_package_proxies_for_record(alice, limit=10, offset=0) == (
        1,
        [
            PackageProxy(
                id="00b71de7-b42f-1fe9-a83f-824452fe966e",
                proxy_instance_id="460591a0-8079-4979-a860-c3a4b18a32ad",
                package_id=184418,
                package_node_id=
                "N:package:b493794a-1c86-4c18-9fb9-dfdf236b1fe3",
                relationship_type="belongs_to",
                created_at=iso8601.parse("2019-11-05T13:44:38.748Z"),
                updated_at=iso8601.parse("2019-11-05T13:44:38.748Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ],
    )

    # Packages link directly to dataset node
    assert db.count_packages() == 1
Exemplo n.º 2
0
def get_model(db: PartitionedDatabase, model_id_or_name: str) -> JsonDict:
    return db.get_model(model_id_or_name).to_dict()
Exemplo n.º 3
0
def get_all_properties(
    db: PartitionedDatabase, concept_id_or_name: str
) -> List[JsonDict]:
    return [to_property_dict(p) for p in db.get_properties(concept_id_or_name)]
Exemplo n.º 4
0
def publish_schema(
    db: PartitionedDatabase,
    tx: Transaction,
    config: PublishConfig,
    s3,
    file_manifests: List[FileManifest],
    proxy_relationship_names: List[RelationshipName],
) -> Tuple[ExportGraphSchema, FileManifest]:
    """
    Export the schema of the partitioned database into a `GraphSchema`
    instance.
    """
    schema_models: List[ExportModel] = []
    schema_relationships: List[ExportModelRelationship] = []

    log.info("Exporting graph schema")

    models: List[Model] = db.get_models_tx(tx)
    model_index: Dict[UUID, Model] = {m.id: m for m in models}

    for m in models:
        log.info(f"Building schema for model '{m.name}'")
        properties: List[ModelProperty] = db.get_properties_tx(tx, m)
        linked_properties: List[ModelRelationship] = list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   from_model=m,
                                                   one_to_many=False))

        publish_properties: List[ExportProperty] = [
            ExportProperty.model_property(
                name=p.name,
                display_name=p.display_name,
                description=p.description,
                data_type=p.data_type,
            ) for p in properties
        ] + [
            ExportProperty.linked_property(
                name=r.name,
                display_name=r.display_name,
                description=r.description,
                data_type=LinkedModelDataType(
                    to=model_index[r.to].name,
                    file=str(OutputFile.csv_for_model(m.name)),
                ),
            ) for r in sorted(linked_properties,
                              key=lambda l: l.index or sys.maxsize)
        ]

        model = ExportModel(
            model=m,
            name=m.name,
            display_name=m.display_name,
            description=m.description,
            properties=publish_properties,
        )
        schema_models.append(model)

    # If any packages exist in this dataset, add a special-cased "File" model
    if len(file_manifests) > 0:
        log.info(f"Building schema for proxy package model")
        proxy_package_model = ExportModel.package_proxy()

        # TODO: gracefully handle this case to avoid overwriting "files.csv"
        assert not any(
            m.name == proxy_package_model.name for m in schema_models
        ), (f"Cannot export package proxy schema model with name '{proxy_package_model.name}' - "
            f"a model '{m.name}' already exists. See https://app.clickup.com/t/102ndc for issue"
            )
        schema_models.append(proxy_package_model)

    relationships = db.get_outgoing_model_relationships_tx(tx,
                                                           one_to_many=True)

    for r in relationships:
        log.info(f"Building schema for relationship '{r.name}'")
        relationship = ExportModelRelationship(
            relationship=r,
            name=r.name,
            from_=model_index[r.from_].name,
            to=model_index[r.to].name,
        )
        schema_relationships.append(relationship)

    for p in proxy_relationship_names:
        log.info(f"Building schema for proxy relationship '{p}'")
        relationship = ExportModelRelationship(relationship=None,
                                               name=p,
                                               from_="",
                                               to="")
        schema_relationships.append(relationship)

    schema = ExportGraphSchema(models=schema_models,
                               relationships=schema_relationships)

    # Write "schema.json" to S3
    # ======================================================================
    schema_output_file = OutputFile.json_for_schema().with_prefix(
        os.path.join(config.s3_publish_key, METADATA))
    s3.put_object(
        Bucket=config.s3_bucket,
        Key=str(schema_output_file),
        Body=schema.to_json(camel_case=True,
                            pretty_print=True,
                            drop_nulls=True),
        RequestPayer="requester",
    )
    schema_manifest = schema_output_file.with_prefix(METADATA).as_manifest(
        size_of(s3, config.s3_bucket, schema_output_file))

    return schema, schema_manifest
Exemplo n.º 5
0
def get_relationship(db: PartitionedDatabase, model_id_or_name: str,
                     relationship_id: ModelRelationshipId) -> JsonDict:
    relationship = db.get_model_relationship(relationship_id)
    if relationship is None:
        raise NotFound(f"Could not get model relationship [{relationship_id}]")
    return relationship.to_dict()
Exemplo n.º 6
0
def delete_relationship(
    db: PartitionedDatabase, record_id: RecordId, relationship_id: RecordRelationshipId
) -> RecordRelationshipId:

    with db.transaction() as tx:
        return db.delete_outgoing_record_relationship_tx(tx, record_id, relationship_id)
Exemplo n.º 7
0
    parser.add_argument("--user-node-id", type=str, default=str(uuid.uuid4()))
    parser.add_argument("--records", "-n", dest="n", type=int, default=1000)

    args = parser.parse_args()

    raw_db = Database.from_config(Config())

    with raw_db.transaction() as tx:
        raw_db.initialize_organization_and_dataset(
            tx,
            organization_id=OrganizationId(args.organization_id),
            dataset_id=DatasetId(args.dataset_id),
            organization_node_id=OrganizationNodeId(args.organization_node_id),
            dataset_node_id=DatasetNodeId(args.dataset_node_id),
        )

    db = PartitionedDatabase(
        raw_db,
        OrganizationId(args.organization_id),
        DatasetId(args.dataset_id),
        UserNodeId(args.user_node_id),
        OrganizationNodeId(args.organization_node_id),
        DatasetNodeId(args.dataset_node_id),
    )

    load(db, args.input, verbose=True)

    db.db.driver.close()

    print("done")
Exemplo n.º 8
0
def get_proxy_instance(db: PartitionedDatabase, proxy_type: str,
                       id_: UUID) -> JsonDict:

    return to_proxy_instance(PROXY_TYPE, db.get_package_proxy(id_))
Exemplo n.º 9
0
def delete_proxy_instance(db: PartitionedDatabase, proxy_type: str,
                          id_: UUID) -> JsonDict:

    db.delete_package_proxy_by_id(id_)
    return None
Exemplo n.º 10
0
def delete_orphaned_datasets_impl(
    bf_database: PennsieveDatabase,
    db: Database,
    organization_id: int,
    dry_run: bool = True,
):
    def completely_delete(partitioned_db):
        cumulative_counts = DatasetDeletionCounts.empty()
        sequential_failures = 0

        while True:
            try:
                summary = partitioned_db.delete_dataset(batch_size=1000,
                                                        duration=2000)
                if summary.done:
                    return summary.update_counts(cumulative_counts)
                else:
                    cumulative_counts = cumulative_counts.update(
                        summary.counts)
                sequential_failures = 0
                time.sleep(0.5)
            except Exception as e:
                sequential_failures += 1
                log.warn("FAIL({sequential_failures}): {str(e)}")
                log.warn("WAITING 2s")
                time.sleep(2.0)
                if sequential_failures >= 5:
                    raise e

    model_service_dataset_ids = db.get_dataset_ids(
        OrganizationId(organization_id))
    api_dataset_ids = bf_database.get_dataset_ids(organization_id)
    orphaned_dataset_ids = set(model_service_dataset_ids) - set(
        api_dataset_ids)

    if dry_run:
        log.info(f"""{"*" * 20} DRY RUN {"*" * 20}""")
        log.info(f"Found {len(orphaned_dataset_ids)} orphaned dataset(s)")
        for dataset_id in orphaned_dataset_ids:
            ds = bf_database.get_dataset(organization_id, dataset_id)
            assert ds is None or ds.state == "DELETING"
            log.info(
                f"Deleting: organization={organization_id} / dataset={dataset_id} ({db.count_child_nodes(organization_id, dataset_id)}) => {ds}"
            )
    else:
        log.info(f"Found {len(orphaned_dataset_ids)} orphaned dataset(s)")
        for dataset_id in orphaned_dataset_ids:
            ds = bf_database.get_dataset(organization_id, dataset_id)
            assert ds is None or ds.state == "DELETING"
            partitioned_db = PartitionedDatabase(
                db,
                OrganizationId(organization_id),
                DatasetId(dataset_id),
                UserNodeId("dataset-delete-migration"),
            )
            log.info(
                f"Deleting: organization={organization_id} / dataset={dataset_id} ({db.count_child_nodes(organization_id, dataset_id)}) => {ds}"
            )
            summary = completely_delete(partitioned_db)
            log.info(str(summary))

    log.info("Done")
Exemplo n.º 11
0
def migrate_dataset(
    organization_id: int,
    dataset_ids: Optional[List[int]] = None,
    remove_existing: bool = False,
    environment: str = "dev",
    jumpbox: Optional[str] = "non-prod",
    smoke_test: bool = True,
    remap_ids: bool = False,
):  # TODO does this need node IDs?

    if dataset_ids is None and remove_existing:
        raise Exception(
            f"Cannot remove existing data from Neo4j while migrating the entire organization {organization_id}"
        )
    elif dataset_ids is None and remap_ids:
        raise Exception(f"Can only remap IDs for a single dataset")
    elif dataset_ids is None:
        entire_organization = True
    else:
        entire_organization = False

    settings = SSMParameters(environment)

    with SSHTunnel(
            remote_host=settings.postgres_host,
            remote_port=settings.postgres_port,
            local_port=7777,
            jumpbox=jumpbox,
    ) as postgres_tunnel, SSHTunnel(
            remote_host=settings.neo4j_host,
            remote_port=settings.neo4j_port,
            local_port=8888,
            jumpbox=jumpbox,
    ) as neo4j_tunnel:

        engine, factory = migrate_db.get_postgres(
            f"postgresql://{settings.postgres_user}:{settings.postgres_password}@{postgres_tunnel.host}:{postgres_tunnel.port}/{settings.postgres_db}"
        )

        neo4j = Database(
            uri=f"bolt://{neo4j_tunnel.host}:{neo4j_tunnel.port}",
            user=settings.neo4j_user,
            password=settings.neo4j_password,
            max_connection_lifetime=300,
        )

        bf_database = PennsieveDatabase(engine, factory, organization_id)

        # 1) Get the target datasets for the migration
        if dataset_ids is None:
            dataset_ids = bf_database.get_dataset_ids(organization_id)

        for dataset_id in dataset_ids:
            print(f"Migrating dataset {dataset_id}")

            partitioned_db = PartitionedDatabase(
                neo4j,
                organization_id=organization_id,
                dataset_id=dataset_id,
                user_id=0)

            # 3) Lock dataset in Pennsieve DB
            bf_database.lock_dataset(organization_id, dataset_id)
            print(f"Got dataset {dataset_id}")

            try:
                # 4) Export data to S3 from Neptune
                export_from_neptune(
                    settings,
                    postgres_tunnel=postgres_tunnel,
                    organization_id=organization_id,
                    dataset_id=dataset_id,
                    jumpbox=jumpbox,
                    smoke_test=smoke_test,
                )

                # 5) Import into Neo4j from S3
                import_to_neo4j.load(
                    dataset=f"{organization_id}/{dataset_id}",
                    bucket=settings.export_bucket,
                    db=partitioned_db,
                    cutover=True,
                    remove_existing=remove_existing,
                    smoke_test=smoke_test,
                    remap_ids=remap_ids,
                )

            finally:
                # Whatever happens, unlock the dataset
                bf_database.unlock_dataset(organization_id, dataset_id)

        # 6) Sanity check that all datasets in the organization have been
        # migrated, then mark the organization as migrated.
        if entire_organization:
            print("Validating migration....")

            for dataset_id in bf_database.get_dataset_ids(organization_id):
                partitioned_db = PartitionedDatabase(
                    neo4j,
                    organization_id=organization_id,
                    dataset_id=dataset_id,
                    user_id=0,
                )

            neo4j.toggle_service_for_organization(
                organization_id=organization_id)

        print("Done.")
Exemplo n.º 12
0
def test_rewrite_ids_and_import(neo4j):
    """
    Test that UUIDs are remapped to the exact correct place with a manually
    defined remapping.
    """

    dataset_id = 60000
    dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484"
    organization_id = 5
    organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b"
    user_id = 114
    user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f"

    db = PartitionedDatabase(
        db=neo4j,
        organization_id=OrganizationId(organization_id),
        dataset_id=DatasetId(dataset_id),
        user_id=user_node_id,
        organization_node_id=organization_node_id,
        dataset_node_id=dataset_node_id,
    )

    REMAPPING = {
        "0b4b3615-9eaf-425d-9727-bcac29686fd5":
        "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
        "7b17c60d-ca2a-4cf5-a4ff-a52bbc32ff17":
        "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
        "e507b3ef-ade4-4672-83b4-f3f0774fb282":
        "cccccccc-cccc-cccc-cccc-cccccccccccc",
        "bf858cb5-ae51-4fcf-ad74-b1887946f70f":
        "dddddddd-dddd-dddd-dddd-dddddddddddd",
        "a99b09f5-caa6-4282-aa0e-cf56bde89254":
        "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee",
        "42fa4eb9-51cc-4c59-b550-ac24d6d5024a":
        "ffffffff-ffff-ffff-ffff-ffffffffffff",
        "ecb71447-b684-c589-abda-b673c38edefc":
        "00000000-0000-0000-0000-000000000000",
        "e2b71447-e29d-11c3-24c6-f2ebffd1486a":
        "11111111-1111-1111-1111-111111111111",
        "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50":
        "22222222-2222-2222-2222-222222222222",
        "2e754729-684a-4c45-960f-348d68737d4d":
        "33333333-3333-3333-3333-333333333333",
        "175ff55b-b44d-4381-bd59-d4dbc0b9c5f0":
        "44444444-4444-4444-4444-444444444444",
        "ccf200d3-e77f-4d9e-bed3-f1f28860152f":
        "55555555-5555-5555-5555-555555555555",
        "443e141b-f59c-419f-82c1-eed97925b04d":
        "66666666-6666-6666-6666-666666666666",
        "d0b71de9-21f9-3557-edda-ad278dd81dc0":
        "77777777-7777-7777-7777-777777777777",
        "aeb7476e-55f6-7924-5e43-a83cfa7e4cef":
        "88888888-8888-8888-8888-888888888888",
        "fa3daedd-1761-4730-be7d-bb5de8e1261c":
        "99999999-9999-9999-9999-999999999999",
        "00b71de7-b42f-1fe9-a83f-824452fe966e":
        "aaaaaaaa-aaaa-aaaa-aaaa-bbbbbbbbbbbb",
        "460591a0-8079-4979-a860-c3a4b18a32ad":
        "aaaaaaaa-aaaa-aaaa-aaaa-cccccccccccc",
    }

    def generate_new_id(old_id):
        new_id = REMAPPING.get(old_id, None)
        if new_id is None:
            return old_id
        return new_id

    load(
        dataset=f"{organization_id}/{dataset_id}",
        bucket="dev-neptune-export-use1",
        db=db,
        use_cache=False,
        smoke_test=False,
        remap_ids=True,
        generate_new_id=generate_new_id,
    )

    # Models

    patient = db.get_model("patient")
    assert patient == Model(
        name="patient",
        display_name="Patient",
        description="",
        count=2,
        id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:01:27.027Z"),
        updated_at=iso8601.parse("2019-11-01T20:01:27.027Z"),
        template_id=None,
    )

    assert sorted(db.get_properties(patient), key=lambda p: p.index) == [
        ModelProperty(
            id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
            name="name",
            display_name="Name",
            description="",
            index=0,
            locked=False,
            model_title=True,
            required=False,
            data_type=dt.String(),
            default=True,
            default_value=None,
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-01T20:01:37.633Z"),
            updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
        ),
        ModelProperty(
            id="cccccccc-cccc-cccc-cccc-cccccccccccc",
            name="dob",
            display_name="DOB",
            description="",
            index=1,
            locked=False,
            model_title=False,
            required=False,
            data_type=dt.Date(),
            default=False,
            default_value=None,
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
            updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
        ),
    ]

    bicycle = db.get_model("bicycle")
    assert bicycle.name == "bicycle"
    assert bicycle.display_name == "Bicycle"
    assert bicycle.id == "dddddddd-dddd-dddd-dddd-dddddddddddd"
    assert bicycle.count == 1
    assert bicycle.template_id == None

    properties = sorted(db.get_properties(bicycle), key=lambda p: p.index)
    assert len(properties) == 2
    brand = properties[0]
    assert brand.name == "brand"
    assert brand.id == "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee"

    color = properties[1]
    assert color.name == "color"
    assert color.data_type == dt.Array(
        items=dt.String(),
        enum=["purple", "blue", "orange", "green", "yellow", "red"])
    assert color.id == "ffffffff-ffff-ffff-ffff-ffffffffffff"

    # Records
    patients = db.get_all_records("patient")

    alice = Record(
        id=UUID("00000000-0000-0000-0000-000000000000"),
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:01:58.537Z"),
        updated_at=iso8601.parse("2019-11-11T15:37:02.165Z"),
        values={
            "name": "Alice",
            "dob": neotime.DateTime(year=2004, month=5, day=5,
                                    tzinfo=pytz.UTC),
        },
    )
    bob = Record(
        id=UUID("11111111-1111-1111-1111-111111111111"),
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:02:21.113Z"),
        updated_at=iso8601.parse("2019-11-01T20:02:21.113Z"),
        values={
            "name":
            "Bob",
            # Embedded linked property
            "mother":
            RecordStub(id=UUID("00000000-0000-0000-0000-000000000000"),
                       title="Alice"),
        },
    )
    assert sorted(patients.results,
                  key=lambda x: x.values["name"]) == [alice, bob]

    assert db.get_all_records("bicycle").results == [
        Record(
            id=UUID("22222222-2222-2222-2222-222222222222"),
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-05T13:47:02.841Z"),
            updated_at=iso8601.parse("2019-11-11T15:12:28.042Z"),
            values={
                "brand": "Bianchi",
                "color": ["red", "blue"]
            },
        )
    ]

    # Model relationships
    with db.transaction() as tx:
        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   patient,
                                                   one_to_many=True)
        ) == [
            ModelRelationship(
                id="33333333-3333-3333-3333-333333333333",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                display_name="Rides",
                description="",
                from_="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                to="dddddddd-dddd-dddd-dddd-dddddddddddd",
                one_to_many=True,
                index=None,
                created_at=iso8601.parse("2019-11-05T13:47:17.981Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:17.981Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   bicycle,
                                                   one_to_many=True)
        ) == [
            ModelRelationship(
                id="44444444-4444-4444-4444-444444444444",
                type="BELONGS_TO",
                name="belongs_to",
                display_name="Belongs To",
                description="",
                from_="dddddddd-dddd-dddd-dddd-dddddddddddd",
                to="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                one_to_many=True,
                index=None,
                created_at=iso8601.parse("2019-11-21T16:47:36.918Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.918Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Model relationship stubs contain no "to" and "from" models, eg. belongs_to

        assert list(db.get_model_relationship_stubs_tx(tx)) == [
            ModelRelationshipStub(
                id="55555555-5555-5555-5555-555555555555",
                name="belongs_to",
                display_name="Belongs To",
                description="",
                type="BELONGS_TO",
                created_at=iso8601.parse("2019-11-05T13:44:38.598Z"),
                updated_at=iso8601.parse("2019-11-05T13:44:38.598Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Duplicate @RELATED_TO relationships are created
        assert len(db.get_model_relationships_tx(tx, one_to_many=True)) == 2

        # Record relationships

        assert list(db.get_outgoing_record_relationships_tx(tx, alice)) == [
            RecordRelationship(
                id="77777777-7777-7777-7777-777777777777",
                from_="00000000-0000-0000-0000-000000000000",
                to="22222222-2222-2222-2222-222222222222",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                model_relationship_id="33333333-3333-3333-3333-333333333333",
                display_name="Rides",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        assert list(
            db.get_outgoing_record_relationships_tx(
                tx, "22222222-2222-2222-2222-222222222222", one_to_many=True)
        ) == [
            RecordRelationship(
                id="88888888-8888-8888-8888-888888888888",
                from_="22222222-2222-2222-2222-222222222222",
                to="11111111-1111-1111-1111-111111111111",
                type="BELONGS_TO",
                name="belongs_to",
                model_relationship_id="44444444-4444-4444-4444-444444444444",
                display_name="Belongs To",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Linked properties

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   patient,
                                                   one_to_many=False)
        ) == [
            ModelRelationship(
                id="66666666-6666-6666-6666-666666666666",
                type="MOTHER",
                name="mother",
                display_name="Mother",
                description="",
                from_="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                to="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                one_to_many=False,
                index=1,
                created_at=iso8601.parse("2019-11-05T13:43:38.341Z"),
                updated_at=iso8601.parse("2019-11-05T13:43:38.341Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Duplicate @RELATED_TO relationships are created for linked properties
        assert len(db.get_model_relationships_tx(tx, one_to_many=False)) == 1

        assert list(
            db.get_outgoing_record_relationships_tx(tx, bob, one_to_many=False)
        ) == [
            RecordRelationship(
                id="99999999-9999-9999-9999-999999999999",
                from_="11111111-1111-1111-1111-111111111111",
                to="00000000-0000-0000-0000-000000000000",
                type="MOTHER",
                model_relationship_id="66666666-6666-6666-6666-666666666666",
                name="mother",
                display_name="Mother",
                one_to_many=False,
                created_at=iso8601.parse("2019-11-05T13:43:54.116Z"),
                updated_at=iso8601.parse("2019-11-05T13:43:54.116Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

    assert db.get_package_proxies_for_record(alice, limit=10, offset=0) == (
        1,
        [
            PackageProxy(
                id="aaaaaaaa-aaaa-aaaa-aaaa-bbbbbbbbbbbb",
                proxy_instance_id="aaaaaaaa-aaaa-aaaa-aaaa-cccccccccccc",
                package_id=184418,
                package_node_id=
                "N:package:b493794a-1c86-4c18-9fb9-dfdf236b1fe3",
                relationship_type="belongs_to",
                created_at=iso8601.parse("2019-11-05T13:44:38.748Z"),
                updated_at=iso8601.parse("2019-11-05T13:44:38.748Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ],
    )

    # Packages link directly to dataset node
    assert db.count_packages() == 1
Exemplo n.º 13
0
def test_fix_multiple_mismatched_model_relationships(neo4j):
    """
    Case when a record relationship has a schemaRelationshipId that does not
    match the (from)-[relationship]->(to) triple of the relationship, but a
    compatible model relationship does exist in the dataset.

    In this case, an extra model relationship (id =
    `4ea7d9e6-b2ae-487c-981c-8202e37343f9`) connects
    (bike)-[rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c]->(bike) and the record
    relationship (person)-[rides]->(bike) incorrectly uses this
    schemaRelationshipId.

    AND

    Another record relationship, with the same relationship type, also claiming
    the same schemaRelationshipId, but actually connecting different models.

    This corrupted data is from old versions of the graph-ingest tool.
    """
    dataset_id = 50000
    dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484"
    organization_id = 5
    organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b"
    user_id = 114
    user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f"

    db = PartitionedDatabase(
        db=neo4j,
        organization_id=OrganizationId(organization_id),
        dataset_id=DatasetId(dataset_id),
        user_id=user_node_id,
        organization_node_id=organization_node_id,
        dataset_node_id=dataset_node_id,
    )

    load(
        dataset=f"{organization_id}/{dataset_id}",
        bucket="dev-neptune-export-use1",
        db=db,
        use_cache=False,
        smoke_test=False,
    )

    with db.transaction() as tx:

        assert list(
            db.get_outgoing_record_relationships_tx(
                tx, "ecb71447-b684-c589-abda-b673c38edefc")
        ) == [
            RecordRelationship(
                id="d0b71de9-21f9-3557-edda-ad278dd81dc0",
                from_="ecb71447-b684-c589-abda-b673c38edefc",
                to="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                model_relationship_id="2e754729-684a-4c45-960f-348d68737d4d",
                display_name="Rides",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        assert sorted(
            db.get_outgoing_record_relationships_tx(
                tx, "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", one_to_many=True),
            key=lambda r: r.type,
        ) == [
            RecordRelationship(
                id="aeb7476e-55f6-7924-5e43-a83cfa7e4cef",
                from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                to="e2b71447-e29d-11c3-24c6-f2ebffd1486a",
                type="BELONGS_TO",
                name="belongs_to",
                model_relationship_id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0",
                display_name="Belongs To",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            ),
            RecordRelationship(
                id="83227fcd-ec4d-47e6-a3a9-4c9fbba4a2f3",
                from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                to="e2b71447-e29d-11c3-24c6-f2ebffd1486a",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                model_relationship_id="92b6a32f-0597-4a7a-944b-27b39998283e",
                display_name="Rides",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            ),
        ]
Exemplo n.º 14
0
def test_fix_nonexistent_model_relationships(neo4j):
    """
    Case when a record relationship has a schemaRelationshipId that does not
    exist, but a model relationhip (d0b71de9-21f9-3557-edda-ad278dd81dc0) is
    defined that matches the (from)-[relationship]->(to) triple of the
    relationship.

    This corrupted data is from old versions of the graph-ingest tool.
    """

    dataset_id = 30000
    dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484"
    organization_id = 5
    organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b"
    user_id = 114
    user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f"

    db = PartitionedDatabase(
        db=neo4j,
        organization_id=OrganizationId(organization_id),
        dataset_id=DatasetId(dataset_id),
        user_id=user_node_id,
        organization_node_id=organization_node_id,
        dataset_node_id=dataset_node_id,
    )

    load(
        dataset=f"{organization_id}/{dataset_id}",
        bucket="dev-neptune-export-use1",
        db=db,
        use_cache=False,
        smoke_test=False,
    )

    with db.transaction() as tx:

        assert list(
            db.get_outgoing_record_relationships_tx(
                tx, "ecb71447-b684-c589-abda-b673c38edefc")
        ) == [
            RecordRelationship(
                id="d0b71de9-21f9-3557-edda-ad278dd81dc0",
                from_="ecb71447-b684-c589-abda-b673c38edefc",
                to="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                model_relationship_id="2e754729-684a-4c45-960f-348d68737d4d",
                display_name="Rides",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        assert list(
            db.get_outgoing_record_relationships_tx(
                tx, "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", one_to_many=True)
        ) == [
            RecordRelationship(
                id="aeb7476e-55f6-7924-5e43-a83cfa7e4cef",
                from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                to="e2b71447-e29d-11c3-24c6-f2ebffd1486a",
                type="BELONGS_TO",
                name="belongs_to",
                model_relationship_id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0",
                display_name="Belongs To",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]
Exemplo n.º 15
0
def get_concept(db: PartitionedDatabase, concept_id_or_name: str) -> JsonDict:
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        property_count = db.get_property_counts_tx(tx, [model.id])[model.id]
        return to_concept_dict(model, property_count)
Exemplo n.º 16
0
def get_proxy_relationship_counts(db: PartitionedDatabase, proxy_type: str,
                                  node_id: str) -> List[JsonDict]:
    return [
        count.to_dict()
        for count in db.get_proxy_relationship_counts(PackageNodeId(node_id))
    ]
Exemplo n.º 17
0
def delete_package_proxy(
    db: PartitionedDatabase, record_id: RecordId, package_id: PackageId
) -> JsonDict:
    return db.delete_package_proxy(record_id, package_id).to_dict()
Exemplo n.º 18
0
def delete_schema_linked_property(db: PartitionedDatabase, id_: str,
                                  link_id: ModelRelationshipId):
    # Ignore the ID of the model/concept, as it's not needed to actually
    # fetch the linked property:
    with db.transaction() as tx:
        return db.delete_model_relationship_tx(tx=tx, relationship=link_id)
Exemplo n.º 19
0
def get_record(db: PartitionedDatabase, record_id: RecordId, linked: bool) -> JsonDict:
    record = db.get_record(record_id, embed_linked=linked)
    if record is None:
        raise NotFound(f"Could not get record {record_id}")
    return record.to_dict()
Exemplo n.º 20
0
def get_all_schema_linked_properties(db: PartitionedDatabase):
    with db.transaction() as tx:
        return [
            to_schema_linked_property_target(relationship) for relationship in
            db.get_outgoing_model_relationships_tx(tx=tx, one_to_many=False)
        ]
Exemplo n.º 21
0
def delete_dataset(db: PartitionedDatabase,
                   batch_size: int = 1000,
                   duration: int = 5000) -> JsonDict:
    return db.delete_dataset(batch_size=batch_size,
                             duration=duration).to_dict()
Exemplo n.º 22
0
def get_relationship(db: PartitionedDatabase, record_id: RecordId,
                     relationship_id: RecordRelationshipId) -> JsonDict:
    with db.transaction() as tx:
        return db.get_outgoing_record_relationship_tx(
            tx, record_id, relationship_id).to_dict()
Exemplo n.º 23
0
def publish_dataset(
    db: PartitionedDatabase,
    s3,
    config: PublishConfig,
    file_manifests: List[FileManifest],
) -> List[FileManifest]:
    """
    Publish the dataset provided by the partitioned database view in `db`.

    Returns a list of file manifests for each file written to S3.
    """
    graph_manifests: List[FileManifest] = []

    with db.transaction() as tx:

        # 0) Get all existing proxy relationships. We'll need these in a few places.
        # ======================================================================
        proxies_by_relationship: Dict[
            RelationshipName,
            List[PackageProxyRelationship]] = defaultdict(list)
        for pp in package_proxy_relationships(db, tx, config, s3,
                                              file_manifests):
            proxies_by_relationship[pp.relationship].append(pp)

        # 1) Publish graph schema
        # ======================================================================
        schema, schema_manifest = publish_schema(
            db, tx, config, s3, file_manifests,
            list(proxies_by_relationship.keys()))
        graph_manifests.append(schema_manifest)

        # 2) Publish record CSVs
        # ======================================================================
        for m in schema.models:
            if m.model:
                graph_manifests.append(
                    publish_records_of_model(db, tx, m.model, config, s3))

        # 3) Publish source file CSV
        # ======================================================================
        if len(file_manifests) > 0:
            graph_manifests.append(
                publish_package_proxy_files(db, tx, config, s3,
                                            file_manifests))

        # 4) Publish relationship CSVs
        # ======================================================================

        # All relationships with the same name need to go in the same CSV file,
        # along with proxy relationships with the same name.
        relationships_by_name: Dict[
            RelationshipName, List[ModelRelationship]] = defaultdict(list)
        for r in schema.relationships:
            if not r.is_proxy_relationship():
                assert r.relationship is not None
                relationships_by_name[r.name].append(r.relationship)

        for relationship_name in set(
                chain(relationships_by_name.keys(),
                      proxies_by_relationship.keys())):
            graph_manifests.append(
                publish_relationships(
                    db,
                    tx,
                    relationship_name,
                    relationships_by_name[relationship_name],
                    proxies_by_relationship[relationship_name],
                    config,
                    s3,
                ))

    return graph_manifests
Exemplo n.º 24
0
)
parser.add_argument(
    "--s3-bucket",
    type=str,
    help="AWS S3 target bucket, either for embargoed or published datasets",
    action=env_action("S3_BUCKET"),
)

if __name__ == "__main__":
    configure_logging("INFO")

    args = parser.parse_args()
    db = PartitionedDatabase.get_from_env(
        organization_id=OrganizationId(args.organization_id),
        dataset_id=DatasetId(args.dataset_id),
        user_id=UserNodeId(args.user_node_id),
        organization_node_id=OrganizationNodeId(args.organization_node_id),
        dataset_node_id=DatasetNodeId(args.dataset_node_id),
    )

    s3 = boto3.client("s3", region_name="us-east-1")

    config = PublishConfig(s3_publish_key=args.s3_publish_key,
                           s3_bucket=args.s3_bucket)

    file_manifests = read_file_manifests(s3, config)

    graph_manifests = publish_dataset(db,
                                      s3,
                                      config,
                                      file_manifests=file_manifests)
Exemplo n.º 25
0
def get_all_models(db: PartitionedDatabase, ) -> List[JsonDict]:
    return [m.to_dict() for m in db.get_models()]
Exemplo n.º 26
0
def get_topology(db: PartitionedDatabase, id_: str) -> JsonDict:
    return [to_legacy_topology(t) for t in db.topology(id_)]
Exemplo n.º 27
0
def get_all_properties(db: PartitionedDatabase,
                       model_id_or_name: str) -> List[JsonDict]:
    return [p.to_dict() for p in db.get_properties(model_id_or_name)]
Exemplo n.º 28
0
def get_all_concepts(db: PartitionedDatabase) -> List[JsonDict]:
    with db.transaction() as tx:
        models = db.get_models_tx(tx)
        property_counts = db.get_property_counts_tx(
            tx, [model.id for model in models])
        return [to_concept_dict(m, property_counts[m.id]) for m in models]
Exemplo n.º 29
0
def get_graph_summary(db: PartitionedDatabase) -> JsonDict:
    return db.summarize().to_dict()
Exemplo n.º 30
0
def test_rewrite_ids_randomly_and_import(neo4j):
    """
    Test that UUIDs are remapped using the default random remapper.
    """
    dataset_id = 70000
    dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484"
    organization_id = 5
    organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b"
    user_id = 114
    user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f"

    db = PartitionedDatabase(
        db=neo4j,
        organization_id=OrganizationId(organization_id),
        dataset_id=DatasetId(dataset_id),
        user_id=user_node_id,
        organization_node_id=organization_node_id,
        dataset_node_id=dataset_node_id,
    )

    load(
        dataset=f"{organization_id}/{dataset_id}",
        bucket="dev-neptune-export-use1",
        db=db,
        use_cache=False,
        smoke_test=False,
        remap_ids=True,
    )

    # Models

    patient = db.get_model("patient")
    assert patient is not None
    assert len(db.get_properties(patient)) == 2
    assert patient.id != UUID("0b4b3615-9eaf-425d-9727-bcac29686fd5")

    bicycle = db.get_model("bicycle")
    assert bicycle is not None
    assert len(db.get_properties(bicycle)) == 2
    assert bicycle.id != UUID("bf858cb5-ae51-4fcf-ad74-b1887946f70f")

    # Records
    patients = db.get_all_records("patient")

    alice = [r for r in patients if r.values["name"] == "Alice"][0]
    assert alice.id != "ecb71447-b684-c589-abda-b673c38edefc"
    bob = [r for r in patients if r.values["name"] == "Bob"][0]
    assert bob.id != UUID("e2b71447-e29d-11c3-24c6-f2ebffd1486a")

    assert len(db.get_all_records("bicycle").results) == 1
    bianchi = db.get_all_records("bicycle").results[0]
    assert bianchi.id != UUID("c8b71de8-cd9c-cc3f-67fe-4e30968d4e50")

    # Model relationships
    with db.transaction() as tx:
        assert (len(
            list(
                db.get_outgoing_model_relationships_tx(
                    tx, patient, one_to_many=True))) == 1)

        assert (len(
            list(
                db.get_outgoing_model_relationships_tx(
                    tx, bicycle, one_to_many=True))) == 1)

        # Model relationship stubs contain no "to" and "from" models, eg. belongs_to

        assert len(list(db.get_model_relationship_stubs_tx(tx))) == 1

        # Duplicate @RELATED_TO relationships are created
        assert len(list(db.get_model_relationships_tx(tx,
                                                      one_to_many=True))) == 2

        # Record relationships

        assert len(list(db.get_outgoing_record_relationships_tx(tx,
                                                                alice))) == 1

        assert (len(
            list(
                db.get_outgoing_record_relationships_tx(
                    tx, bianchi, one_to_many=True))) == 1)

        # Linked properties

        assert (len(
            list(
                db.get_outgoing_model_relationships_tx(
                    tx, patient, one_to_many=False))) == 1)

        # Duplicate @RELATED_TO relationships are created for linked properties
        assert len(list(db.get_model_relationships_tx(tx,
                                                      one_to_many=False))) == 1

        assert (len(
            list(
                db.get_outgoing_record_relationships_tx(
                    tx, bob, one_to_many=False))) == 1)

    assert len(
        db.get_package_proxies_for_record(alice, limit=10, offset=0)[1]) == 1

    # Packages link directly to dataset node
    assert db.count_packages() == 1