Пример #1
0
def extra_additions_dataset(example_frame, example_readme):
    ds = Dataset(example_frame, "test_dataset", "me", example_readme)
    ds.set_path_columns(["2dReadPath"])
    ds.set_extra_files([example_readme])
    ds.set_column_names_map({"2dReadPath": "MappedPath"})
    ds.set_metadata_columns(["Structure"])
    return ds
Пример #2
0
def distribute_seg_dataset(
    test=False,
    csv_loc="../input_segs_and_tiffs/raw_seg_013_014_images.csv",
    col_name_map={
        "fov_path": "original_fov_location",
        "FOVId": "fov_id",
        "seg_file_name": "2D_fov_tiff_path",
    },
    dataset_name="2d_segmented_fields",
    package_owner="rorydm",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)

    # rename some cols
    df = df.rename(col_name_map, axis="columns")

    # drop any cols with missing data
    vds = validate(df, drop_on_error=True)
    df = vds.data.reset_index(drop=True)

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path="README.md",
    )

    # structure scores as auxilary file
    score_files = [
        Path(f"../structure_scores/structure_score_55000000{p}.csv")
        for p in (13, 14)
    ]
    score_dfs = [
        pd.read_csv(f).rename({"mh Score": "mh score"}, axis="columns")
        for f in score_files
    ]
    df_score = pd.concat(score_dfs, axis="rows", ignore_index=True, sort=False)
    df_score.to_csv(Path("../structure_scores/structure_scores.csv"))

    # set data path cols, metadata cols, and extra files
    ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(["2D_fov_tiff_path"])
    ds.set_extra_files(
        ["../channel_defs.json", "../structure_scores/structure_scores.csv"])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #3
0
def distribute_scrnaseq_data(
    test=False,
    csv_loc="scrnaseq_data_raw.csv",
    dataset_name="scrnaseq_data",
    package_owner="tanyasg",
    s3_bucket="s3://allencell-internal-quilt",
):

    df = pd.read_csv(csv_loc)

    # subsample features to make test
    if test:
        # write test matrix
        make_test_mtx(csv_loc=csv_loc)

        # make test manifest; counts only; no anndata
        df = pd.DataFrame({
            "counts": [
                "raw_counts_test.mtx",
                df["counts"][1],
                "cells_test.csv",
                "cells_test.csv",
            ]
        })

        dataset_name = f"{dataset_name}_test"

        # create the dataset without supplementary files
        ds = Dataset(
            dataset=df,
            name=dataset_name,
            package_owner=package_owner,
            readme_path="README.md",
        )

        # columns with files to upload
        ds.set_path_columns(["counts"])

    else:
        ds = Dataset(
            dataset=df,
            name=dataset_name,
            package_owner=package_owner,
            readme_path="README.md",
        )

        # columns with files to upload
        ds.set_path_columns(["counts", "anndata"])

        # anndata object (h5ad) as supplementary files
        ds.set_extra_files([
            "/allen/aics/gene-editing/RNA_seq/scRNAseq_SeeligCollaboration/2019_analysis/merged_experiment_1_2/scrnaseq_cardio_20191210.RData"
        ])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
Пример #4
0
def distribute_autocontrasted_dataset(
    test=False,
    csv_loc="/allen/aics/gene-editing/FISH/2019/chaos/data/normalized_2D_tiffs/5500000075_B3/image_manifest_final.csv",
    col_name_map={},
    dataset_name="2d_autocontrasted_fields_and_single_cells_actn2",
    package_owner="rorydm",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)

    # rename some cols
    df = df.rename(col_name_map, axis="columns")
    df = df.drop(["2D_fov_tiff_path"], axis="columns").rename(col_name_map,
                                                              axis="columns")

    # drop any cols with missing data
    vds = validate(df, drop_on_error=True)
    df = vds.data.reset_index(drop=True)

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path=
        "/allen/aics/gene-editing/FISH/2019/chaos/data/normalized_2D_tiffs/5500000075_B3/README.md",
    )

    # set data path cols, metadata cols, and extra files
    ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(
        ["rescaled_2D_fov_tiff_path", "rescaled_2D_single_cell_tiff_path"])
    ds.set_extra_files([
        "/allen/aics/gene-editing/FISH/2019/chaos/data/normalized_2D_tiffs/5500000075_B3/channel_defs.json",
        "/allen/aics/gene-editing/FISH/2019/chaos/data/normalized_2D_tiffs/5500000075_B3/parameters.json",
    ])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
def distribute_nonstructure_dataset(
    test=False,
    csv_loc="nonstructure_fov_manifest_for_quilt.csv",
    col_name_map={
        "FOVId": "fov_id",
        "fov_path": "original_fov_location"
    },
    dataset_name="2d_nonstructure_fields",
    package_owner="tanyasg",
    s3_bucket="s3://allencell-internal-quilt",
):

    # read in original csv
    df = pd.read_csv(csv_loc)

    # rename some cols
    df = df.rename(col_name_map, axis="columns")

    # drop any cols with missing data
    vds = validate(df, drop_on_error=True)
    df = vds.data.reset_index(drop=True)

    # subsample df for eg a test dataset
    if test:
        df = df.sample(2, random_state=0)
        dataset_name = f"{dataset_name}_test"

    # create the dataset
    ds = Dataset(
        dataset=df,
        name=dataset_name,
        package_owner=package_owner,
        readme_path="README.md",
    )

    # set data path cols, metadata cols, and extra files
    ds.set_metadata_columns(["fov_id", "original_fov_location"])
    ds.set_path_columns(["merged_2D_fov_tiff_path"])
    ds.set_extra_files(["channel_defs.json"])

    # tag with commit hash
    label = (subprocess.check_output(["git", "rev-parse",
                                      "HEAD"]).strip().decode("utf-8"))
    ds.distribute(s3_bucket,
                  message=f"git commit hash of fish_morphology_code = {label}")
    image_name = row['image_name']
    location = list(
        set(df_feat_inds.loc[df_feat_inds['image_name'] == image_name,
                             'original_fov_location']))[0]
    df.loc[index, 'original_fov_location'] = location

plot_df = plot_ds.merge(
    right=df,
    left_on=['FOV path', 'Cell number'],
    right_on=['original_fov_location', 'napariCell_ObjectNumber'])

plot_df = plot_df[[
    'original_fov_location', 'napariCell_ObjectNumber',
    'seg_561_cell_dist_nuc_per_obj_median',
    'seg_638_cell_dist_nuc_per_obj_median'
]]

plot_df.to_csv('probe_localization_for_plot.csv')

test_df = df.loc[0:2]
ds = Dataset(
    dataset=df,
    name='probe_localization',
    package_owner='calystay',
    readme_path='C:/Users/calystay/Desktop/README.md',
)
ds.set_extra_files(['probe_localization_for_plot.csv'])
ds.set_metadata_columns(["original_fov_location"])
ds.distribute("s3://allencell-internal-quilt",
              message="probe localization with original_fov_location")
Пример #7
0
# Add a license
ds.add_license("https://www.allencell.org/terms-of-use.html")

# Indicate column values to use for file metadata
ds.set_metadata_columns([
    "CellId", "CellIndex", "CellLine", "NucMembSegmentationAlgorithm",
    "NucMembSegmentationAlgorithmVersion", "FOVId", "Gene", "PlateId",
    "WellId", "ProteinDisplayName", "StructureDisplayName", "Workflow",
    "FeatureExplorerURL"
])

# Set produced package directory naming
ds.set_column_names_map({
    "save_feats_path": "cell_features",
    "save_reg_path": "cell_images_3d",
    "save_reg_path_flat": "cell_images_2d",
    "save_reg_path_flat_proj": "cell_images_2d_projections"
})

# Add any extra files
ds.set_extra_files(
    {"contact_sheets": list(scp_output_dir.glob("diagnostics_*.png"))})

# Step 6:
# Distribute the package
ds.distribute(push_uri="s3://quilt-aics",
              message="Add feature explorer links to metadata")

print("-" * 80)
print("COMPLETE")