Python paginate_a_folder示例

编程语言: Python

命名空间/包名称: helpful_functions

方法/功能: paginate_a_folder

hotexamples.com的示例: 6

Python paginate_a_folder - 已找到6个示例。这些是从开源项目中提取的最受好评的helpful_functions.paginate_a_folder现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： lambda_function.py 项目： daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]

    prefix, batchAndPipe = key.split("pipelines/")
    image_prefix = prefix.split("workspace")[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata["barcoding_rows"]) * int(
        metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata["barcoding_cycles"])

    # Get the list of images in this experiment - this can take a long time for big experiments so let's add some prints
    print("Getting the list of images")
    image_list_prefix = (
        image_prefix + batch + "/images/"
    )  # the slash here is critical, because we don't want to read images_corrected because it's huge
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    print("Image list retrieved")
    image_dict = helpful_functions.parse_image_names(image_list,
                                                     filter_in="10X",
                                                     filter_out="copy")
    metadata["barcoding_file_data"] = image_dict
    print("Parsing the image list")
    # We've saved the previous for looking at/debugging later, but really all we want is the ones with all cycles
    if metadata["one_or_many_files"] == 1:
        parsed_image_dict = helpful_functions.return_full_wells(
            image_dict, expected_cycles, metadata["one_or_many_files"])
    else:
        parsed_image_dict = helpful_functions.return_full_wells(
            image_dict,
            expected_cycles,
            metadata["one_or_many_files"],
            files_per_well=num_series,
        )
    metadata["wells_with_all_cycles"] = parsed_image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # Pull the file names we care about, and make the CSV
    print("Making the CSVs")
    platelist = list(image_dict.keys())
    for eachplate in platelist:
        platedict = parsed_image_dict[eachplate]
        well_list = list(platedict.keys())
        bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                         "/images/" + eachplate)
        per_plate_csv = create_CSVs.create_CSV_pipeline5(
            eachplate,
            num_series,
            expected_cycles,
            bucket_folder,
            platedict,
            metadata["one_or_many_files"],
            metadata["fast_or_slow_mode"],
        )
        csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                              eachplate + "/load_data_pipeline5.csv")
        with open(per_plate_csv, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)

    # Now it's time to run DCP
    # Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)
    # run_DCP.grab_batch_config(bucket,prefix,batch,step)

    # Make a batch
    create_batch_jobs.create_batch_jobs_5(image_prefix, batch, pipeline_name,
                                          platelist, expected_cycles, app_name)

    # Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist) * expected_cycles)

    # Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print("Go run the monitor now")

示例#2

显示文件

文件： lambda_function.py 项目： daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    prefix, batchAndPipe = key.split('pipelines/')
    image_prefix = prefix.split('workspace')[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    #Get the list of images in this experiment
    image_list_prefix = image_prefix + batch + '/images/'
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    image_dict = helpful_functions.parse_image_names(image_list,
                                                     filter_in='20X',
                                                     filter_out='copy')
    metadata['painting_file_data'] = image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    if metadata['one_or_many_files'] == 'one':
        full_well_files = 1
    else:
        full_well_files = num_series

    #Pull the file names we care about, and make the CSV
    platelist = image_dict.keys()
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = platedict.keys()
        paint_cycle_name = platedict[well_list[0]].keys()[0]
        per_well_im_list = []
        for eachwell in well_list:
            per_well = platedict[eachwell][paint_cycle_name]
            per_well.sort()
            if len(per_well) == full_well_files:
                per_well_im_list.append(per_well)
        bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images/' + eachplate + '/' + paint_cycle_name
        per_plate_csv = create_CSVs.create_CSV_pipeline1(
            eachplate, num_series, bucket_folder, per_well_im_list,
            metadata['one_or_many_files'])
        csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline1.csv'
        with open(per_plate_csv, 'rb') as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)

    #Now it's time to run DCP
    #Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)
    #run_DCP.grab_batch_config(bucket,prefix,batch,step)

    #Make a batch
    create_batch_jobs.create_batch_jobs_1(image_prefix, batch, pipeline_name,
                                          platelist, app_name)

    #Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist))

    #Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print('Go run the monitor now')

示例#3

显示文件

def lambda_handler(event, context):
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    if "csv" in key:
        plate = key.split("/")[-2].split("-")[0]
        batch = key.split("/")[-5]
        image_prefix = key.split(batch)[0]

    else:
        batch = key.split("/")[-2]
        image_prefix = key.split("workspace")[0]

    prefix = os.path.join(image_prefix, "workspace/")
    print(batch, prefix)

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["painting_file_data"]
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    out_range = list(range(0, num_series, range_skip))
    expected_files_per_well = (num_series *
                               int(metadata["painting_channels"])) + 6
    platelist = list(image_dict.keys())
    plate_and_well_list = []
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = list(platedict.keys())
        for eachwell in well_list:
            plate_and_well_list.append((eachplate, eachwell))
    metadata["painting_plate_and_well_list"] = plate_and_well_list
    helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/images_corrected/painting"
    # Expected length shows that all transfers (i.e. all wells) have at least started
    expected_len = (
        (len(plate_and_well_list) - 1) * expected_files_per_well) + 1

    print("Checking if all files are present")
    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        print("Checking CSVs for thresholds")
        image_csv_list = helpful_functions.paginate_a_folder(
            s3,
            bucket_name,
            os.path.join(image_prefix, batch, "images_corrected/painting"),
        )
        image_csv_list = [x for x in image_csv_list if "Image.csv" in x]
        image_df = helpful_functions.concat_some_csvs(s3, bucket_name,
                                                      image_csv_list,
                                                      "Image.csv")
        threshes = image_df["Threshold_FinalThreshold_Cells"]
        calc_upper_percentile = numpy.percentile(threshes, upper_percentile)
        print(
            "In ",
            len(image_csv_list) * num_series,
            f"images, the {upper_percentile} percentile was",
            calc_upper_percentile,
        )
        calc_lower_percentile = numpy.percentile(threshes, lower_percentile)
        print(
            "In ",
            len(image_csv_list) * num_series,
            f"images, the {lower_percentile} percentile was",
            calc_lower_percentile,
        )

        pipeline_on_bucket_name = os.path.join(prefix, "pipelines", batch,
                                               pipeline_name)
        local_pipeline_name = os.path.join("/tmp", pipeline_name)
        local_temp_pipeline_name = os.path.join(
            "/tmp",
            pipeline_name.split(".")[0] + "_edited.cppipe")
        with open(local_pipeline_name, "wb") as f:
            s3.download_fileobj(bucket_name, pipeline_on_bucket_name, f)
        edit_id_secondary(local_pipeline_name, local_temp_pipeline_name,
                          calc_lower_percentile, calc_upper_percentile)
        with open(local_temp_pipeline_name, "rb") as pipeline:
            s3.put_object(Body=pipeline,
                          Bucket=bucket_name,
                          Key=pipeline_on_bucket_name)
        print("Edited pipeline file")

        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict.keys())
            bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                             "/images_corrected/painting")
            per_plate_csv = create_CSVs.create_CSV_pipeline3(
                eachplate, num_series, bucket_folder, well_list, range_skip)
            csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                                  eachplate + "/load_data_pipeline3.csv")
            print("Created", csv_on_bucket_name)
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_3(image_prefix, batch,
                                              pipeline_name,
                                              plate_and_well_list, out_range,
                                              app_name)

        # Start a cluster
        run_DCP.run_cluster(
            bucket_name,
            prefix,
            batch,
            step,
            fleet_file_name,
            len(plate_and_well_list) * len(out_range),
        )

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"

示例#4

显示文件

文件： lambda_function.py 项目： daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    # Log the received event
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']

    prefix, batchAndPipe = key.split('pipelines/')
    image_prefix = prefix.split('workspace')[0]
    batch = batchAndPipe.split(pipeline_name)[0][:-1]

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix,'metadata',batch,'metadata.json')
    metadata = helpful_functions.download_and_read_metadata_file(s3, bucket, metadata_file_name, metadata_on_bucket_name)
    num_series = int(metadata['barcoding_rows']) * int(metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata['barcoding_cycles'])

    #Get the list of images in this experiment - this can take a long time for big experiments so let's add some prints
    print('Getting the list of images')
    image_list_prefix = image_prefix+batch+'/images/' #the slash here is critical, because we don't want to read images_corrected because it's huge
    image_list = helpful_functions.paginate_a_folder(s3,bucket,image_list_prefix)
    print('Image list retrieved')
    image_dict = helpful_functions.parse_image_names(image_list, filter_in = '10X', filter_out = 'copy')
    metadata ['barcoding_file_data'] = image_dict
    print('Parsing the image list')
    #We've saved the previous for looking at/debugging later, but really all we want is the ones with all cycles
    if metadata['one_or_many_files'] == 1:
        parsed_image_dict = helpful_functions.return_full_wells(image_dict,expected_cycles, metadata['one_or_many_files'])
    else:
        parsed_image_dict = helpful_functions.return_full_wells(image_dict,expected_cycles, metadata['one_or_many_files'], files_per_well=num_series)
    metadata['wells_with_all_cycles'] = parsed_image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata, metadata_file_name, metadata_on_bucket_name)

    #Pull the file names we care about, and make the CSV
    print('Making the CSVs')
    platelist = image_dict.keys()
    for eachplate in platelist:
        platedict = parsed_image_dict[eachplate]
        well_list = platedict.keys()
        bucket_folder = '/home/ubuntu/bucket/'+image_prefix+batch+'/images/'+eachplate
        per_plate_csv = create_CSVs.create_CSV_pipeline5(eachplate, num_series, expected_cycles, bucket_folder, platedict, metadata['one_or_many_files'], metadata["fast_or_slow_mode"])
        csv_on_bucket_name = prefix + 'load_data_csv/'+batch+'/'+eachplate+'/load_data_pipeline5.csv'
        with open(per_plate_csv,'rb') as a:
            s3.put_object(Body= a, Bucket = bucket, Key = csv_on_bucket_name )

    #Now it's time to run DCP
    #Replacement for 'fab setup'
    app_name = run_DCP.run_setup(bucket,prefix,batch,step)
    #run_DCP.grab_batch_config(bucket,prefix,batch,step)

    #Make a batch
    create_batch_jobs.create_batch_jobs_5(image_prefix,batch,pipeline_name,platelist, expected_cycles, app_name)

    #Start a cluster
    run_DCP.run_cluster(bucket,prefix,batch,step, fleet_file_name, len(platelist)*expected_cycles)

    #Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch,step)
    print('Go run the monitor now')

示例#5

显示文件

def lambda_handler(event, context):
    bucket = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    prefix, batchAndPipe = key.split("pipelines/")
    image_prefix = prefix.split("workspace")[0]
    batch = batchAndPipe.split("1_")[0][:-1]

    # Get the metadata file
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket, metadata_file_name, metadata_on_bucket_name)
    # Standard vs. SABER configs
    if "Channeldict" not in list(metadata.keys()):
        print("Update your metadata.json to include Channeldict")
        return "Update your metadata.json to include Channeldict"
    Channeldict = ast.literal_eval(metadata["Channeldict"])
    if len(Channeldict.keys()) == 1:
        SABER = False
        print("Not a SABER experiment")
    if len(Channeldict.keys()) > 1:
        SABER = True
        print("SABER experiment")

    # Calculate number of images from rows and columns in metadata
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    # Overwrite rows x columns number series if images per well set in metadata
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    # Get the list of images in this experiment
    if not SABER:
        parse_name_filter = "20X_CP_"
    if SABER:
        parse_name_filter = ""
    image_list_prefix = image_prefix + batch + "/images/"
    image_list = helpful_functions.paginate_a_folder(s3, bucket,
                                                     image_list_prefix)
    image_dict = helpful_functions.parse_image_names(
        image_list, filter_in=parse_name_filter, filter_out="copy")
    metadata["painting_file_data"] = image_dict
    helpful_functions.write_metadata_file(s3, bucket, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # How many files/well indicates the well has all images present
    if metadata["one_or_many_files"] == "one":
        full_well_files = 1
    else:
        full_well_files = num_series

    # Pull the file names we care about, and make the CSV
    platelist = list(image_dict.keys())
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = list(platedict.keys())
        Channelrounds = list(Channeldict.keys())
        # Only keep full wells
        print(
            f"{full_well_files} expect files per well and round for {eachplate}"
        )
        incomplete_wells = []
        for eachwell in well_list:
            for eachround in Channelrounds:
                per_well = platedict[eachwell][eachround]
                if len(per_well) != full_well_files:
                    incomplete_wells.append(eachwell)
                    print(
                        f"{eachwell} {eachround} doesn't have full well files. {len(per_well)} files found."
                    )
        if incomplete_wells:
            for well in incomplete_wells:
                del platedict[well]
        bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                         "/images/" + eachplate + "/")
        illum_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                        "/illum/" + eachplate)
        per_plate_csv, per_plate_csv_2 = create_CSVs.create_CSV_pipeline1(
            eachplate,
            num_series,
            bucket_folder,
            illum_folder,
            platedict,
            metadata["one_or_many_files"],
            metadata["Channeldict"],
        )
        csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                              eachplate + "/load_data_pipeline1.csv")
        csv_on_bucket_name_2 = (prefix + "load_data_csv/" + batch + "/" +
                                eachplate + "/load_data_pipeline2.csv")
        with open(per_plate_csv, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name)
        with open(per_plate_csv_2, "rb") as a:
            s3.put_object(Body=a, Bucket=bucket, Key=csv_on_bucket_name_2)

    # Now it's time to run DCP
    app_name = run_DCP.run_setup(bucket, prefix, batch, step)

    # Make a batch
    if not SABER:
        pipeline_name = "1_CP_Illum.cppipe"
    if SABER:
        pipeline_name = "1_SABER_CP_Illum.cppipe"
    create_batch_jobs.create_batch_jobs_1(image_prefix, batch, pipeline_name,
                                          platelist, app_name)

    # Start a cluster
    run_DCP.run_cluster(bucket, prefix, batch, step, fleet_file_name,
                        len(platelist))

    # Run the monitor
    run_DCP.run_monitor(bucket, prefix, batch, step)
    print("Go run the monitor now")

示例#6

显示文件

文件： lambda_function.py 项目： daisukekubota0823/pooled-cell-painting-image-processing

def lambda_handler(event, context):
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    if 'csv' in key:
        plate = key.split('/')[-2].split('-')[0]
        batch = key.split('/')[-5]
        image_prefix = key.split(batch)[0]

    else:
        batch = key.split('/')[-2]
        image_prefix = key.split('workspace')[0]

    prefix = os.path.join(image_prefix, 'workspace/')
    print(batch, prefix)

    # Get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    out_range = range(0, num_series, range_skip)
    expected_files_per_well = (num_series * 6)
    platelist = image_dict.keys()
    plate_and_well_list = metadata['painting_plate_and_well_list']

    # First let's check if 3A is done
    filter_prefix = image_prefix + batch + '/images_segmentation/segment_troubleshoot'
    expected_len = (len(plate_and_well_list) * expected_files_per_well)

    print('Checking if all files are present')
    done = helpful_functions.check_if_run_done(s3, bucket_name, filter_prefix,
                                               expected_len, current_app_name,
                                               prev_step_app_name, sqs,
                                               duplicate_queue_name)

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        print("Checking CSVs for what the upper threshold should be")
        image_csv_list = helpful_functions.paginate_a_folder(
            s3, bucket_name,
            os.path.join(image_prefix, batch,
                         'images_segmentation/troubleshoot'))
        image_csv_list = [x for x in image_csv_list if 'Image.csv' in x]
        image_df = helpful_functions.concat_some_csvs(s3, bucket_name,
                                                      image_csv_list,
                                                      'Image.csv')
        threshes = image_df['Threshold_FinalThreshold_Cells']
        percentile = numpy.percentile(threshes, 90)
        print("In ",
              len(image_csv_list) * num_series,
              "images, the 90th percentile was", percentile)

        pipeline_on_bucket_name = os.path.join(prefix, 'pipelines', batch,
                                               pipeline_name)
        local_pipeline_name = os.path.join('/tmp', pipeline_name)
        local_temp_pipeline_name = os.path.join(
            '/tmp',
            pipeline_name.split('.')[0] + '_edited.cppipe')
        with open(local_pipeline_name, 'wb') as f:
            s3.download_fileobj(bucket_name, pipeline_on_bucket_name, f)
        edit_id_secondary(local_pipeline_name, local_temp_pipeline_name,
                          percentile)
        with open(local_temp_pipeline_name, 'rb') as pipeline:
            s3.put_object(Body=pipeline,
                          Bucket=bucket_name,
                          Key=pipeline_on_bucket_name)
        print('Edited pipeline file')

        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = platedict.keys()
            bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images_corrected/painting'
            per_plate_csv = create_CSVs.create_CSV_pipeline3(
                eachplate, num_series, bucket_folder, well_list, range_skip)
            csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline3B.csv'
            print('Created', csv_on_bucket_name)
            with open(per_plate_csv, 'rb') as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # Now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)
        print('app_name is', app_name)

        # Make the jobs
        create_batch_jobs.create_batch_jobs_3B(image_prefix, batch,
                                               pipeline_name,
                                               plate_and_well_list, out_range,
                                               app_name)

        # Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, config_step,
                            fleet_file_name,
                            len(plate_and_well_list) * len(out_range))

        # Create the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')