def distribute_struct_scores_bonus(
    test=False,
    csv_loc="/allen/aics/gene-editing/FISH/2019/chaos/data/20200911_classifier_features_bonus/manifest_20201007_tg.csv",
    dataset_name="struct_scores_bonus",
    package_owner="tanyasg",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path=
        "/allen/aics/gene-editing/FISH/2019/chaos/data/20200911_classifier_features_bonus/README.md",
    )

    # set data path cols, metadata cols, and extra files
    #     ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(["result_image_path"])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #2
0
def distribute_scrnaseq_data(
    test=False,
    csv_loc="scrnaseq_data_raw.csv",
    dataset_name="scrnaseq_data",
    package_owner="tanyasg",
    s3_bucket="s3://allencell-internal-quilt",
):

    df = pd.read_csv(csv_loc)

    # subsample features to make test
    if test:
        # write test matrix
        make_test_mtx(csv_loc=csv_loc)

        # make test manifest; counts only; no anndata
        df = pd.DataFrame({
            "counts": [
                "raw_counts_test.mtx",
                df["counts"][1],
                "cells_test.csv",
                "cells_test.csv",
            ]
        })

        dataset_name = f"{dataset_name}_test"

        # create the dataset without supplementary files
        ds = Dataset(
            dataset=df,
            name=dataset_name,
            package_owner=package_owner,
            readme_path="README.md",
        )

        # columns with files to upload
        ds.set_path_columns(["counts"])

    else:
        ds = Dataset(
            dataset=df,
            name=dataset_name,
            package_owner=package_owner,
            readme_path="README.md",
        )

        # columns with files to upload
        ds.set_path_columns(["counts", "anndata"])

        # anndata object (h5ad) as supplementary files
        ds.set_extra_files([
            "/allen/aics/gene-editing/RNA_seq/scRNAseq_SeeligCollaboration/2019_analysis/merged_experiment_1_2/scrnaseq_cardio_20191210.RData"
        ])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #3
0
def distribute_seg_dataset(
    test=False,
    csv_loc="../input_segs_and_tiffs/raw_seg_013_014_images.csv",
    col_name_map={
        "fov_path": "original_fov_location",
        "FOVId": "fov_id",
        "seg_file_name": "2D_fov_tiff_path",
    },
    dataset_name="2d_segmented_fields",
    package_owner="rorydm",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)

    # rename some cols
    df = df.rename(col_name_map, axis="columns")

    # drop any cols with missing data
    vds = validate(df, drop_on_error=True)
    df = vds.data.reset_index(drop=True)

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path="README.md",
    )

    # structure scores as auxilary file
    score_files = [
        Path(f"../structure_scores/structure_score_55000000{p}.csv")
        for p in (13, 14)
    ]
    score_dfs = [
        pd.read_csv(f).rename({"mh Score": "mh score"}, axis="columns")
        for f in score_files
    ]
    df_score = pd.concat(score_dfs, axis="rows", ignore_index=True, sort=False)
    df_score.to_csv(Path("../structure_scores/structure_scores.csv"))

    # set data path cols, metadata cols, and extra files
    ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(["2D_fov_tiff_path"])
    ds.set_extra_files(
        ["../channel_defs.json", "../structure_scores/structure_scores.csv"])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #4
0
def test_dataset_metadata_numpy_type_casting(example_frame, example_readme):
    # Add numpy column to frame
    example_frame["NumpyTypes"] = np.zeros(9)
    ds = Dataset(example_frame, "test_dataset", "me", example_readme)

    # Add column filled with numpy types to index
    ds.set_metadata_columns(["NumpyTypes"])

    # Just run distribute to make sure that numpy types are cast fine
    ds.distribute()
Пример #5
0
def distribute_cellprofiler_features(
    test=False,
    csv_loc="/allen/aics/gene-editing/FISH/2019/chaos/data/cp_20201022/merged_features/features2quilt/features2quilt.csv",
    dataset_name="2d_autocontrasted_single_cell_features_actn2_2",
    package_owner="tanyasg",
    s3_bucket="s3://allencell-internal-quilt",
):
    df = pd.read_csv(csv_loc)

    # subsample features to make test
    if test:
        # write test feature csv and test image counts csv
        make_test_csv(csv_loc=csv_loc)
        cell_line = df["cell_line"][0]
        cellprofiler_id = df["cellprofiler_id"][0]

        # make test manifest
        df = pd.DataFrame({
            "feature_file": ["cp_features_test.csv"],
            "image_object_count_file": ["image_object_counts_test.csv"],
            "cell_line": [cell_line],
            "cellprofiler_id": [cellprofiler_id],
        })

        dataset_name = f"{dataset_name}_test"

    # Create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path=
        "/allen/aics/gene-editing/FISH/2019/chaos/data/cp_20201022/merged_features/features2quilt/README.md",
    )

    # Optionally add common additional requirements
    ds.add_usage_doc(
        "https://docs.quiltdata.com/walkthrough/reading-from-a-package")
    ds.add_license("https://www.allencell.org/terms-of-use.html")

    # Optionally indicate column values to use for file metadata
    ds.set_metadata_columns(["cell_line", "cellprofiler_id"])

    # Optionally rename the columns on the package level
    ds.set_column_names_map({
        "feature_file": "features",
        "image_object_count_file": "object_counts"
    })

    # add commit hash to message
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    # Distribute
    ds.distribute(push_uri=s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #6
0
def test_dataset_metadata_non_json_serializable_type(example_frame,
                                                     example_readme):
    # Add non json serializable type to dataframe
    example_frame["BadType"] = [SomeDummyObject(i) for i in range(9)]
    ds = Dataset(example_frame, "test_dataset", "me", example_readme)

    # Add column filled with non serializable type to index
    ds.set_metadata_columns(["BadType"])

    # Check non json serializable type check fails
    with pytest.raises(TypeError):
        ds.distribute()
def distribute_struct_scores_actn2_live(
    test=False,
    csv_loc="/allen/aics/gene-editing/FISH/2019/chaos/data/20201012_actn2_live_classifier_with_metadata/live_manifest.csv",
    dataset_name="struct_scores_actn2_live",
    package_owner="tanyasg",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)
    df["CellPath_x"] = df["CellPath_x"].str.replace(
        "singlecells",
        "/allen/aics/assay-dev/computational/data/cardio_pipeline_datastep/local_staging_pipeline_actn2/singlecells/singlecells",
        regex=False,
    )
    df = df.drop(columns=[
        "BackgroundPath",
        "ClassificationPath",
        "MemMaxProjectionPath",
        "MemSegmentationPath",
        "NucMaxProjectionPath",
        "StrMaxIntensitySlicePath",
        "CellPath_y",
        "path",
        "image_name",
        "cell_id_filename",
    ])

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path=
        "/allen/aics/gene-editing/FISH/2019/chaos/data/20200929_classifier_features_actn2/README_actn2_live.md",
    )

    # set data path cols, metadata cols, and extra files
    # ds.set_metadata_columns(["RawFilePath", "BackgroundPath", "ClassificationPath", "MemMaxProjectionPath", "MemSegmentationPath", "NucMaxProjectionPath", "StrMaxIntensitySlicePath"])
    ds.set_path_columns(["CellPath_x"])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #8
0
def distribute_autocontrasted_dataset(
    test=False,
    csv_loc="/allen/aics/gene-editing/FISH/2019/chaos/data/normalized_2D_tiffs/5500000075_B3/image_manifest_final.csv",
    col_name_map={},
    dataset_name="2d_autocontrasted_fields_and_single_cells_actn2",
    package_owner="rorydm",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)

    # rename some cols
    df = df.rename(col_name_map, axis="columns")
    df = df.drop(["2D_fov_tiff_path"], axis="columns").rename(col_name_map,
                                                              axis="columns")

    # drop any cols with missing data
    vds = validate(df, drop_on_error=True)
    df = vds.data.reset_index(drop=True)

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path=
        "/allen/aics/gene-editing/FISH/2019/chaos/data/normalized_2D_tiffs/5500000075_B3/README.md",
    )

    # set data path cols, metadata cols, and extra files
    ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(
        ["rescaled_2D_fov_tiff_path", "rescaled_2D_single_cell_tiff_path"])
    ds.set_extra_files([
        "/allen/aics/gene-editing/FISH/2019/chaos/data/normalized_2D_tiffs/5500000075_B3/channel_defs.json",
        "/allen/aics/gene-editing/FISH/2019/chaos/data/normalized_2D_tiffs/5500000075_B3/parameters.json",
    ])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
def main():
    try:
        args = Args()

        # Create dataset
        ds = Dataset(dataset=args.dataset_path,
                     name=args.dataset_name,
                     package_owner=args.package_owner,
                     readme_path=args.readme_path)

        # Handle optional provided
        if args.usage_doc_or_link:
            ds.add_usage_doc(args.usage_doc_or_link)
        if args.license_doc_or_link:
            ds.add_license(args.license_doc_or_link)
        if args.metadata_columns:
            ds.set_metadata_columns(args.metadata_columns)
        if args.path_columns:
            ds.set_path_columns(args.path_columns)

        # Distribute
        pkg = ds.distribute(push_uri=args.push_uri, message=args.message)
        log.info(
            f"Completed distribution. "
            f"Package [name: '{args.package_owner}/{args.dataset_name}', version: {pkg.top_hash}]"
        )

    except Exception as e:
        log.error("=============================================")
        if args.debug:
            log.error("\n\n" + traceback.format_exc())
            log.error("=============================================")
        log.error("\n\n" + str(e) + "\n")
        log.error("=============================================")
        sys.exit(1)
Пример #10
0
def test_dataset_auto_metadata_grouping_repeated_values(
        repeated_values_frame, example_readme):
    """
    Because the repeated values dataset has three unique files but has nine rows of data, this function
    checks that there are only three files passed to the package object but that each file has a list of the unique
    CellIds but that because all the structures are the same per file, that the structure has been reduced to a single
    value.
    """
    # Create dataset from frame
    ds = Dataset(repeated_values_frame, "test_dataset", "me", example_readme)
    ds.set_metadata_columns(["CellId", "Structure"])

    # Generate package
    pkg = ds.distribute()

    # Check file groupings available
    assert set(pkg.keys()) == {
        "SourceReadPath", "README.md", "metadata.csv", "referenced_files"
    }

    # Check that only three tiffs were attached to package
    assert len(pkg["SourceReadPath"]) == 3

    # Check that CellId is a list because of repeated values but that Structure is a string because always unique
    for f in pkg["SourceReadPath"]:
        assert isinstance(pkg["SourceReadPath"][f].meta["CellId"], list)
        assert isinstance(pkg["SourceReadPath"][f].meta["Structure"], str)
Пример #11
0
def distribute_struct_scores_actn2(
    test=False,
    csv_loc="/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/results_Fish/AssayDevFishAnalsysis-Handoff-transcript2protein.csv",
    dataset_name="struct_scores_actn2_2",
    package_owner="tanyasg",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)

    # only include new actn2 fish in this package -> 5500000322/323 imaged 2020-10
    date = df["original_fov_location"].str.split("/", expand=True)
    df["date"] = date[7]
    df = df[df.date.isin(["20201002", "20201006"])]
    df = df.drop(columns=["date"])

    # update result image dir (moved after processing)
    img_dir = "/allen/aics/assay-dev/MicroscopyOtherData/Viana/projects/fish_morphology_code/fish_morphology_code/processing/structure_organization/output_Fish/"
    new_result_path = [
        img_dir + Path(x).name for x in df["result_image_path"].tolist()
    ]
    df["result_image_path"] = new_result_path

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path=
        "/allen/aics/gene-editing/FISH/2019/chaos/data/20200929_classifier_features_actn2/README.md",
    )

    # set data path cols, metadata cols, and extra files
    #     ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(["result_image_path"])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
def distribute_nonstructure_dataset(
    test=False,
    csv_loc="nonstructure_fov_manifest_for_quilt.csv",
    col_name_map={
        "FOVId": "fov_id",
        "fov_path": "original_fov_location"
    },
    dataset_name="2d_nonstructure_fields",
    package_owner="tanyasg",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)

    # rename some cols
    df = df.rename(col_name_map, axis="columns")

    # drop any cols with missing data
    vds = validate(df, drop_on_error=True)
    df = vds.data.reset_index(drop=True)

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path="README.md",
    )

    # set data path cols, metadata cols, and extra files
    ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(["merged_2D_fov_tiff_path"])
    ds.set_extra_files(["channel_defs.json"])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #13
0
def distribute_nuclear_masks(
    test=False,
    csv_loc=Path(
        "/allen/aics/microscopy/Calysta/test/fish_struc_seg/sarc_classification_for_Rory.csv"
    ),
    dataset_name="2d_nuclear_masks",
    package_owner="calystay",
    s3_bucket="s3://allencell-internal-quilt",
    readme_path="README.md",
):

    # read in original csv
    df_in = pd.read_csv(csv_loc)

    # extract original_fov_location and nuc_mask_path from dataframe
    df = df_in[["original_fov_location", "nuc_mask_path"]]
    df = df.drop_duplicates()

    # drop any cols with missing data
    vds = validate(df, drop_on_error=True)
    df = vds.data.reset_index(drop=True)

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path=readme_path,
    )

    # set data path cols, metadata cols, and extra files
    ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(["nuclear_mask_path"])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #14
0
def test_dataset_file_grouping_with_matching_names(same_filenames_frame,
                                                   example_readme):
    # Create dataset from frame
    ds = Dataset(same_filenames_frame, "test_dataset", "me", example_readme)

    # Generate package
    pkg = ds.distribute()

    # Check file groupings available
    assert set(pkg.keys()) == {
        "SourceReadPath", "README.md", "metadata.csv", "referenced_files"
    }

    # Check that 18 unique files were attached to package
    assert len(pkg["SourceReadPath"]) == 18
# Indicate column values to use for file metadata
ds.set_metadata_columns([
    "CellId", "CellIndex", "CellLine", "NucMembSegmentationAlgorithm",
    "NucMembSegmentationAlgorithmVersion", "FOVId", "Gene", "PlateId", "WellId",
    "ProteinDisplayName", "StructureDisplayName", "Workflow", "FeatureExplorerURL"
])

# Set produced package directory naming
ds.set_column_names_map({
    "save_feats_path": "cell_features",
    "save_reg_path": "cell_images_3d",
    "save_reg_path_flat": "cell_images_2d",
    "save_reg_path_flat_proj": "cell_images_2d_projections"
})

# Add any extra files
ds.set_extra_files({
    "contact_sheets": list(scp_output_dir.glob("diagnostics_*.png"))
})

# Step 6:
# Distribute the package
ds.distribute(
    push_uri="s3://allencell",
    message="Statistical Integrated Cell Research Data including Controls"
)

print("-" * 80)
print("COMPLETE")
Пример #16
0
    "NucMembSegmentationAlgorithmVersion", "FOVId", "Gene", "PlateId",
    "WellId", "ProteinDisplayName", "StructureDisplayName", "Workflow",
    "FeatureExplorerURL"
])

# Set produced package directory naming
ds.set_column_names_map({
    "MembraneContourReadPath":
    "membrane_contours",
    "MembraneSegmentationReadPath":
    "membrane_segmentations",
    "NucleusContourReadPath":
    "dna_contours",
    "NucleusSegmentationReadPath":
    "dna_segmentations",
    "SourceReadPath":
    "fovs",
    "StructureContourReadPath":
    "structure_contours",
    "StructureSegmentationReadPath":
    "structure_segmentations"
})

# Step 6:
# Distribute the package
ds.distribute(push_uri="s3://quilt-aics",
              message="Add feature explorer links to metadata")

print("-" * 80)
print("COMPLETE")
Пример #17
0
        "### Global structure organization and local structural alignment features\n\n"
    )
    for meta in metadata:
        for key, value in meta.items():
            ftxt.write("- `{0}`: {1}\n".format(
                value["name"] if value["name"] is not None else key,
                value["description"],
            ))

# Checking expected shape of the dataframe
assert df.shape == (5161, 25)

# Save a hand off version for the Modeling team
df.to_csv("../results/AssayDevFishAnalsysis-Handoff.csv")

# Upload to Quilt
ds = Dataset(
    dataset="../results/AssayDevFishAnalsysis-Handoff.csv",
    name="assay_dev_fish_analysis",
    package_owner="matheus",
    readme_path="assay-dev-fish.md",
)

# Set metadata and path columns
ds.set_metadata_columns(["CellId"])
ds.set_path_columns(["result_image_path"])

# Send to Quilt
pkg = ds.distribute(push_uri="s3://allencell-internal-quilt",
                    message="Fish dataset by assay-dev")
Пример #18
0
    "WellId", "ProteinDisplayName", "StructureDisplayName", "Workflow",
    "FeatureExplorerURL"
])

# Set produced package directory naming
ds.set_column_names_map({
    "MembraneContourReadPath":
    "membrane_contours",
    "MembraneSegmentationReadPath":
    "membrane_segmentations",
    "NucleusContourReadPath":
    "dna_contours",
    "NucleusSegmentationReadPath":
    "dna_segmentations",
    "SourceReadPath":
    "fovs",
    "StructureContourReadPath":
    "structure_contours",
    "StructureSegmentationReadPath":
    "structure_segmentations"
})

# Step 6:
# Distribute the package
ds.distribute(
    push_uri="s3://allencell",
    message="Update feature explorer links and documentation for new bucket")

print("-" * 80)
print("COMPLETE")
        "tanyasg/2d_autocontrasted_single_cell_features",
        "s3://allencell-internal-quilt",
    )
df_feat_inds = p_feats["features"]["a749d0e2_cp_features.csv"]()[["fov_path"]].rename(columns={"fov_path":"original_fov_location"})
df_feat_inds = df_feat_inds.drop_duplicates()

for index, row in df_feat_inds.iterrows():
    df_feat_inds.loc[index, 'original_fov_name'] = row['original_fov_location'].split('/')[-1]

for index, row in df.iterrows():
    df.loc[index, 'original_fov_location'] = df_feat_inds.loc[df_feat_inds['file_name'] == row['original_fov_name'], 'original_fov_location'].values.tolist()[0]

# merge df
df_new = df.merge(df_feat_inds, how='inner', on=['original_fov_name'])
df_new = df_new.set_index('index')

# Upload to quilt
test_df = df_new[0:2]
ds = Dataset(
    dataset=df_new,
    name='3d_actn2_segmentation',
    package_owner='calystay',
    readme_path=r'C:\Users\calystay\Desktop\README.md',
)
ds.set_metadata_columns(["original_fov_location"])
ds.set_path_columns(["struc_seg_path"])
ds.distribute(
    "s3://allencell-internal-quilt", message="3D actn2 segmentation with original_fov_location"
    )

    image_name = row['image_name']
    location = list(
        set(df_feat_inds.loc[df_feat_inds['image_name'] == image_name,
                             'original_fov_location']))[0]
    df.loc[index, 'original_fov_location'] = location

plot_df = plot_ds.merge(
    right=df,
    left_on=['FOV path', 'Cell number'],
    right_on=['original_fov_location', 'napariCell_ObjectNumber'])

plot_df = plot_df[[
    'original_fov_location', 'napariCell_ObjectNumber',
    'seg_561_cell_dist_nuc_per_obj_median',
    'seg_638_cell_dist_nuc_per_obj_median'
]]

plot_df.to_csv('probe_localization_for_plot.csv')

test_df = df.loc[0:2]
ds = Dataset(
    dataset=df,
    name='probe_localization',
    package_owner='calystay',
    readme_path='C:/Users/calystay/Desktop/README.md',
)
ds.set_extra_files(['probe_localization_for_plot.csv'])
ds.set_metadata_columns(["original_fov_location"])
ds.distribute("s3://allencell-internal-quilt",
              message="probe localization with original_fov_location")
Пример #21
0
import pandas as pd
from quilt3distribute import Dataset

df = pd.read_csv(
    '/allen/aics/microscopy/Calysta/test/fish_struc_seg/sarc_classification_for_Rory.csv'
)
df = df.drop(['Unnamed: 0'], axis=1)
df = df.drop(['Unnamed: 0.1'], axis=1)

df = df.rename(columns={
    'fov_path': 'original_fov_location',
    'cell_num': 'napariCell_ObjectNumber'
})

df = df[['nuc_mask_path', 'original_fov_location']]
df = df.drop_duplicates()

test_df = df.loc[0:2]
ds = Dataset(
    dataset=test_df,
    name='2d_nuclear_masks_test',
    package_owner='calystay',
    readme_path=r'C:\Users\calystay\Desktop\README.md',
)
ds.set_metadata_columns(["original_fov_location"])
ds.set_path_columns(["nuc_mask_path"])
ds.distribute("s3://allencell-internal-quilt",
              message="2D nuclear masks with original_fov_location")