def main(): try: args = Args() # Create dataset ds = Dataset(dataset=args.dataset_path, name=args.dataset_name, package_owner=args.package_owner, readme_path=args.readme_path) # Handle optional provided if args.usage_doc_or_link: ds.add_usage_doc(args.usage_doc_or_link) if args.license_doc_or_link: ds.add_license(args.license_doc_or_link) if args.metadata_columns: ds.set_metadata_columns(args.metadata_columns) if args.path_columns: ds.set_path_columns(args.path_columns) # Distribute pkg = ds.distribute(push_uri=args.push_uri, message=args.message) log.info( f"Completed distribution. " f"Package [name: '{args.package_owner}/{args.dataset_name}', version: {pkg.top_hash}]" ) except Exception as e: log.error("=============================================") if args.debug: log.error("\n\n" + traceback.format_exc()) log.error("=============================================") log.error("\n\n" + str(e) + "\n") log.error("=============================================") sys.exit(1)
def distribute_cellprofiler_features( test=False, csv_loc="/allen/aics/gene-editing/FISH/2019/chaos/data/cp_20201022/merged_features/features2quilt/features2quilt.csv", dataset_name="2d_autocontrasted_single_cell_features_actn2_2", package_owner="tanyasg", s3_bucket="s3://allencell-internal-quilt", ): df = pd.read_csv(csv_loc) # subsample features to make test if test: # write test feature csv and test image counts csv make_test_csv(csv_loc=csv_loc) cell_line = df["cell_line"][0] cellprofiler_id = df["cellprofiler_id"][0] # make test manifest df = pd.DataFrame({ "feature_file": ["cp_features_test.csv"], "image_object_count_file": ["image_object_counts_test.csv"], "cell_line": [cell_line], "cellprofiler_id": [cellprofiler_id], }) dataset_name = f"{dataset_name}_test" # Create the dataset ds = Dataset( dataset=df, name=dataset_name, package_owner=package_owner, readme_path= "/allen/aics/gene-editing/FISH/2019/chaos/data/cp_20201022/merged_features/features2quilt/README.md", ) # Optionally add common additional requirements ds.add_usage_doc( "https://docs.quiltdata.com/walkthrough/reading-from-a-package") ds.add_license("https://www.allencell.org/terms-of-use.html") # Optionally indicate column values to use for file metadata ds.set_metadata_columns(["cell_line", "cellprofiler_id"]) # Optionally rename the columns on the package level ds.set_column_names_map({ "feature_file": "features", "image_object_count_file": "object_counts" }) # add commit hash to message label = (subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode("utf-8")) # Distribute ds.distribute(push_uri=s3_bucket, message=f"git commit hash of fish_morphology_code = {label}")