示例#1
0
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2].split("-")[0]
    batch = key.split("/")[-5]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    print(plate, batch, image_prefix, prefix)

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["barcoding_file_data"]
    num_series = int(metadata["barcoding_rows"]) * int(
        metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    # number of site * 4 channels barcoding * number of cycles. doesn't include 1 DAPI/site
    expected_files_per_well = int(num_series) * 4 * int(
        metadata["barcoding_cycles"])
    plate_and_well_list = metadata["barcoding_plate_and_well_list"]

    if "barcoding_xoffset_tiles" in list(metadata.keys()):
        barcoding_xoffset_tiles = metadata["barcoding_xoffset_tiles"]
        barcoding_yoffset_tiles = metadata["barcoding_yoffset_tiles"]
    else:
        barcoding_xoffset_tiles = barcoding_yoffset_tiles = "0"

    if "compress" in list(metadata.keys()):
        compress = metadata["compress"]
    else:
        compress = "True"

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/images_corrected/barcoding"
    # Because this step is batched per site (not well) don't need to anticipate partial loading of jobs
    expected_len = (
        int(len(plate_and_well_list)) * int(expected_files_per_well) + 5)

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name,
                                     prefix,
                                     batch,
                                     step,
                                     cellprofiler=False)

        # make the jobs
        create_batch_jobs.create_batch_jobs_8(
            bucket_name,
            image_prefix,
            batch,
            metadata,
            plate_and_well_list,
            app_name,
            tileperside=tileperside,
            final_tile_size=final_tile_size,
            xoffset_tiles=barcoding_xoffset_tiles,
            yoffset_tiles=barcoding_yoffset_tiles,
            compress=compress,
        )

        # Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            len(plate_and_well_list))

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    plate = key.split('/')[-2]
    batch = key.split('/')[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['wells_with_all_cycles']
    num_series = int(metadata['barcoding_rows']) * int(
        metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata['barcoding_cycles'])
    platelist = image_dict.keys()

    pipe_name = pipeline_name
    if metadata["fast_or_slow_mode"] == 'fast':
        if 'fast' not in pipe_name:
            pipe_name = pipe_name[:-7] + '_fast.cppipe'
    else:
        if 'slow' not in pipe_name:
            pipe_name = pipe_name[:-7] + '_slow.cppipe'
    print(pipe_name)

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/illum'
    expected_len = int(metadata['barcoding_cycles']) * len(platelist) * 5

    done = helpful_functions.check_if_run_done(s3,
                                               bucket_name,
                                               filter_prefix,
                                               expected_len,
                                               current_app_name,
                                               prev_step_app_name,
                                               sqs,
                                               duplicate_queue_name,
                                               filter_in='Cycle')

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        #First thing first, let's make an easier-to-use plate and well list and save it
        plate_and_well_list = []
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = platedict['1'].keys()
            for eachwell in well_list:
                plate_and_well_list.append((eachplate, eachwell))
        metadata['barcoding_plate_and_well_list'] = plate_and_well_list
        helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                              metadata_file_name,
                                              metadata_on_bucket_name)
        #Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images/' + eachplate
            illum_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/illum/' + eachplate
            per_plate_csv = create_CSVs.create_CSV_pipeline6(
                eachplate, num_series, expected_cycles, bucket_folder,
                illum_folder, platedict, metadata['one_or_many_files'],
                metadata["fast_or_slow_mode"])
            csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline6.csv'
            print('Created', csv_on_bucket_name)
            with open(per_plate_csv, 'rb') as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # first let's just try to run the monitor on the last step, in case we haven't yet
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        #make the jobs
        create_batch_jobs.create_batch_jobs_6(image_prefix, batch, pipe_name,
                                              plate_and_well_list, app_name,
                                              metadata['one_or_many_files'],
                                              num_series)

        #Start a cluster
        if metadata['one_or_many_files'] == 'one':
            njobs = len(plate_and_well_list) * 19
        else:
            njobs = len(plate_and_well_list) * num_series
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            njobs)

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')
示例#3
0
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2]
    batch = key.split("/")[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    # Get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["painting_file_data"]
    # Calculate number of images from rows and columns in metadata
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    # Overwrite rows x columns number series if images per well set in metadata
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    # Standard vs. SABER configs
    if "Channeldict" not in list(metadata.keys()):
        print("Update your metadata.json to include Channeldict")
        return "Update your metadata.json to include Channeldict"
    Channeldict = ast.literal_eval(metadata["Channeldict"])
    if len(Channeldict.keys()) == 1:
        SABER = False
        print("Not a SABER experiment")
    if len(Channeldict.keys()) > 1:
        SABER = True
        print("SABER experiment")

    platelist = list(image_dict.keys())
    platedict = image_dict[plate]
    well_list = list(platedict.keys())

    # Now let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/illum"
    expected_len = (int(metadata["painting_channels"]) + 1) * len(platelist)

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
        filter_out="Cycle",
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"

    else:
        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        if not SABER:
            pipeline_name = "2_CP_Apply_Illum.cppipe"
        if SABER:
            pipeline_name = "2_SABER_CP_Apply_Illum.cppipe"
        # make the jobs
        create_batch_jobs.create_batch_jobs_2(image_prefix, batch,
                                              pipeline_name, platelist,
                                              well_list, app_name)

        # Start a cluster
        run_DCP.run_cluster(
            bucket_name,
            prefix,
            batch,
            step,
            fleet_file_name,
            len(platelist) * len(well_list),
        )

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    if ".cppipe" not in key:
        plate = key.split("/")[-2].split("_")[0]
        batch = key.split("/")[-5]
        image_prefix = key.split(batch)[0]
        print(plate)
    else:
        batch = key.split("/")[-2]
        image_prefix = key.split("workspace")[0]
    prefix = os.path.join(image_prefix, "workspace/")

    print(
        f"Batch is {batch}\n Image prefix is {image_prefix}\n Prefix is {prefix}"
    )

    # Check that the barcodes.csv is present
    barcodepath = os.path.join(prefix, "metadata", batch)
    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=barcodepath)
    filelist = []
    for obj in response.get('Contents', []):
        filelist += obj
    if ".csv" not in filelist:
        print(f"No Barcodes.csv in {barcodepath}")
        return ("Barcodes.csv is missing")

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print(("Loading", metadata_on_bucket_name))
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    plate_and_well_list = metadata["barcoding_plate_and_well_list"]
    image_dict = metadata["wells_with_all_cycles"]
    expected_cycles = metadata["barcoding_cycles"]
    platelist = list(image_dict.keys())
    num_series = int(metadata["barcoding_rows"]) * int(
        metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_files_per_well = (num_series * (
        (int(metadata["barcoding_cycles"]) * 4) + 1)) + 3
    num_sites = len(plate_and_well_list) * num_series

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/images_aligned/barcoding"
    # Expected length shows that all transfers (i.e. all wells) have at least started
    expected_len = (
        (len(plate_and_well_list) - 1) * expected_files_per_well) + 1

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict["1"].keys())
            bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                             "/images_aligned/barcoding")
            per_plate_csv = create_CSVs.create_CSV_pipeline7(
                eachplate, num_series, expected_cycles, bucket_folder,
                well_list)
            csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                                  eachplate + "/load_data_pipeline7.csv")
            print(("Created", csv_on_bucket_name))
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_7(
            image_prefix,
            batch,
            pipeline_name,
            plate_and_well_list,
            list(range(num_series)),
            app_name,
        )

        # Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            num_sites)

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"
def lambda_handler(event, context):
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    if 'csv' in key:
        plate = key.split('/')[-2].split('-')[0]
        batch = key.split('/')[-5]
        image_prefix = key.split(batch)[0]

    else:
        batch = key.split('/')[-2]
        image_prefix = key.split('workspace')[0]

    prefix = os.path.join(image_prefix, 'workspace/')
    print(batch, prefix)

    # Get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    out_range = range(0, num_series, range_skip)
    expected_files_per_well = (num_series * 6)
    platelist = image_dict.keys()
    plate_and_well_list = metadata['painting_plate_and_well_list']

    # First let's check if 3A is done
    filter_prefix = image_prefix + batch + '/images_segmentation/segment_troubleshoot'
    expected_len = (len(plate_and_well_list) * expected_files_per_well)

    print('Checking if all files are present')
    done = helpful_functions.check_if_run_done(s3, bucket_name, filter_prefix,
                                               expected_len, current_app_name,
                                               prev_step_app_name, sqs,
                                               duplicate_queue_name)

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        print("Checking CSVs for what the upper threshold should be")
        image_csv_list = helpful_functions.paginate_a_folder(
            s3, bucket_name,
            os.path.join(image_prefix, batch,
                         'images_segmentation/troubleshoot'))
        image_csv_list = [x for x in image_csv_list if 'Image.csv' in x]
        image_df = helpful_functions.concat_some_csvs(s3, bucket_name,
                                                      image_csv_list,
                                                      'Image.csv')
        threshes = image_df['Threshold_FinalThreshold_Cells']
        percentile = numpy.percentile(threshes, 90)
        print("In ",
              len(image_csv_list) * num_series,
              "images, the 90th percentile was", percentile)

        pipeline_on_bucket_name = os.path.join(prefix, 'pipelines', batch,
                                               pipeline_name)
        local_pipeline_name = os.path.join('/tmp', pipeline_name)
        local_temp_pipeline_name = os.path.join(
            '/tmp',
            pipeline_name.split('.')[0] + '_edited.cppipe')
        with open(local_pipeline_name, 'wb') as f:
            s3.download_fileobj(bucket_name, pipeline_on_bucket_name, f)
        edit_id_secondary(local_pipeline_name, local_temp_pipeline_name,
                          percentile)
        with open(local_temp_pipeline_name, 'rb') as pipeline:
            s3.put_object(Body=pipeline,
                          Bucket=bucket_name,
                          Key=pipeline_on_bucket_name)
        print('Edited pipeline file')

        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = platedict.keys()
            bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images_corrected/painting'
            per_plate_csv = create_CSVs.create_CSV_pipeline3(
                eachplate, num_series, bucket_folder, well_list, range_skip)
            csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline3B.csv'
            print('Created', csv_on_bucket_name)
            with open(per_plate_csv, 'rb') as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # Now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)
        print('app_name is', app_name)

        # Make the jobs
        create_batch_jobs.create_batch_jobs_3B(image_prefix, batch,
                                               pipeline_name,
                                               plate_and_well_list, out_range,
                                               app_name)

        # Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, config_step,
                            fleet_file_name,
                            len(plate_and_well_list) * len(out_range))

        # Create the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')
示例#6
0
def lambda_handler(event, context):
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    if "csv" in key:
        plate = key.split("/")[-2].split("-")[0]
        batch = key.split("/")[-5]
        image_prefix = key.split(batch)[0]

    else:
        batch = key.split("/")[-2]
        image_prefix = key.split("workspace")[0]

    prefix = os.path.join(image_prefix, "workspace/")
    print(batch, prefix)

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["painting_file_data"]
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    out_range = list(range(0, num_series, range_skip))
    expected_files_per_well = (num_series *
                               int(metadata["painting_channels"])) + 6
    platelist = list(image_dict.keys())
    plate_and_well_list = []
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = list(platedict.keys())
        for eachwell in well_list:
            plate_and_well_list.append((eachplate, eachwell))
    metadata["painting_plate_and_well_list"] = plate_and_well_list
    helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/images_corrected/painting"
    # Expected length shows that all transfers (i.e. all wells) have at least started
    expected_len = (
        (len(plate_and_well_list) - 1) * expected_files_per_well) + 1

    print("Checking if all files are present")
    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        print("Checking CSVs for thresholds")
        image_csv_list = helpful_functions.paginate_a_folder(
            s3,
            bucket_name,
            os.path.join(image_prefix, batch, "images_corrected/painting"),
        )
        image_csv_list = [x for x in image_csv_list if "Image.csv" in x]
        image_df = helpful_functions.concat_some_csvs(s3, bucket_name,
                                                      image_csv_list,
                                                      "Image.csv")
        threshes = image_df["Threshold_FinalThreshold_Cells"]
        calc_upper_percentile = numpy.percentile(threshes, upper_percentile)
        print(
            "In ",
            len(image_csv_list) * num_series,
            f"images, the {upper_percentile} percentile was",
            calc_upper_percentile,
        )
        calc_lower_percentile = numpy.percentile(threshes, lower_percentile)
        print(
            "In ",
            len(image_csv_list) * num_series,
            f"images, the {lower_percentile} percentile was",
            calc_lower_percentile,
        )

        pipeline_on_bucket_name = os.path.join(prefix, "pipelines", batch,
                                               pipeline_name)
        local_pipeline_name = os.path.join("/tmp", pipeline_name)
        local_temp_pipeline_name = os.path.join(
            "/tmp",
            pipeline_name.split(".")[0] + "_edited.cppipe")
        with open(local_pipeline_name, "wb") as f:
            s3.download_fileobj(bucket_name, pipeline_on_bucket_name, f)
        edit_id_secondary(local_pipeline_name, local_temp_pipeline_name,
                          calc_lower_percentile, calc_upper_percentile)
        with open(local_temp_pipeline_name, "rb") as pipeline:
            s3.put_object(Body=pipeline,
                          Bucket=bucket_name,
                          Key=pipeline_on_bucket_name)
        print("Edited pipeline file")

        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict.keys())
            bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                             "/images_corrected/painting")
            per_plate_csv = create_CSVs.create_CSV_pipeline3(
                eachplate, num_series, bucket_folder, well_list, range_skip)
            csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" +
                                  eachplate + "/load_data_pipeline3.csv")
            print("Created", csv_on_bucket_name)
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_3(image_prefix, batch,
                                              pipeline_name,
                                              plate_and_well_list, out_range,
                                              app_name)

        # Start a cluster
        run_DCP.run_cluster(
            bucket_name,
            prefix,
            batch,
            step,
            fleet_file_name,
            len(plate_and_well_list) * len(out_range),
        )

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    plate = key.split('/')[-2]
    batch = key.split('/')[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    #Pull the file names we care about, and make the CSV
    platelist = image_dict.keys()

    plate = key.split('/')[-2]
    platedict = image_dict[plate]
    well_list = platedict.keys()
    paint_cycle_name = platedict[well_list[0]].keys()[0]
    per_well_im_list = []
    if metadata['one_or_many_files'] == 'one':
        full_well_files = 1
    else:
        full_well_files = num_series
    full_well_list = []
    for eachwell in well_list:
        per_well = platedict[eachwell][paint_cycle_name]
        if len(per_well) == full_well_files:  #only keep full wells
            per_well_im_list.append(per_well)
            full_well_list.append(eachwell)
            print('Added well', eachwell)
    bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images/' + plate + '/' + paint_cycle_name
    illum_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/illum/' + plate
    per_plate_csv = create_CSVs.create_CSV_pipeline2(
        plate, num_series, bucket_folder, illum_folder, per_well_im_list,
        full_well_list, metadata['one_or_many_files'])
    csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + plate + '/load_data_pipeline2.csv'
    print(csv_on_bucket_name)
    with open(per_plate_csv, 'rb') as a:
        s3.put_object(Body=a, Bucket=bucket_name, Key=csv_on_bucket_name)

    #Now let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/illum'
    expected_len = (int(metadata['painting_channels']) + 1) * len(platelist)

    done = helpful_functions.check_if_run_done(s3,
                                               bucket_name,
                                               filter_prefix,
                                               expected_len,
                                               current_app_name,
                                               prev_step_app_name,
                                               sqs,
                                               duplicate_queue_name,
                                               filter_out='Cycle')

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')

    else:
        # first let's just try to run the monitor on
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        #make the jobs
        create_batch_jobs.create_batch_jobs_2(image_prefix, batch,
                                              pipeline_name, platelist,
                                              well_list, app_name)

        #Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            len(platelist) * len(well_list))

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')
示例#8
0
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2]
    batch = key.split("/")[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch,
                                           "metadata.json")
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata["painting_file_data"]
    num_series = int(metadata["painting_rows"]) * int(
        metadata["painting_columns"])
    if "painting_imperwell" in list(metadata.keys()):
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])

    # Pull the file names we care about, and make the CSV
    platelist = list(image_dict.keys())
    plate = key.split("/")[-2]
    platedict = image_dict[plate]
    well_list = list(platedict.keys())
    paint_cycle_name = list(platedict[well_list[0]].keys())[0]
    per_well_im_list = []
    if metadata["one_or_many_files"] == "one":
        full_well_files = 1
    else:
        full_well_files = num_series
    full_well_list = []
    for eachwell in well_list:
        per_well = platedict[eachwell][paint_cycle_name]
        if len(per_well) == full_well_files:  # only keep full wells
            per_well_im_list.append(per_well)
            full_well_list.append(eachwell)
            print("Added well", eachwell)
        else:
            print(f"Discarded well {eachwell}. Missing images.")
    bucket_folder = ("/home/ubuntu/bucket/" + image_prefix + batch +
                     "/images/" + plate + "/" + paint_cycle_name)
    illum_folder = "/home/ubuntu/bucket/" + image_prefix + batch + "/illum/" + plate
    per_plate_csv = create_CSVs.create_CSV_pipeline2(
        plate,
        num_series,
        bucket_folder,
        illum_folder,
        per_well_im_list,
        full_well_list,
        metadata["one_or_many_files"],
    )
    csv_on_bucket_name = (prefix + "load_data_csv/" + batch + "/" + plate +
                          "/load_data_pipeline2.csv")
    print(csv_on_bucket_name)
    with open(per_plate_csv, "rb") as a:
        s3.put_object(Body=a, Bucket=bucket_name, Key=csv_on_bucket_name)

    # Now let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/illum"
    expected_len = (int(metadata["painting_channels"]) + 1) * len(platelist)

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
        filter_out="Cycle",
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"

    else:
        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_2(image_prefix, batch,
                                              pipeline_name, platelist,
                                              well_list, app_name)

        # Start a cluster
        run_DCP.run_cluster(
            bucket_name,
            prefix,
            batch,
            step,
            fleet_file_name,
            len(platelist) * len(well_list),
        )

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event["Records"][0]["s3"]["bucket"]["name"]
    key = event["Records"][0]["s3"]["object"]["key"]
    keys = [x["s3"]["object"]["key"] for x in event["Records"]]
    plate = key.split("/")[-2]
    batch = key.split("/")[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, "workspace/")

    # get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, "metadata", batch, "metadata.json")
    print("Loading", metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name
    )

    image_dict = metadata["wells_with_all_cycles"]
    num_series = int(metadata["barcoding_rows"]) * int(metadata["barcoding_columns"])
    if "barcoding_imperwell" in list(metadata.keys()):
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_cycles = int(metadata["barcoding_cycles"])
    platelist = list(image_dict.keys())

    # Default pipeline is slow. If images acquired in fast mode, pulls alternate pipeline.
    pipe_name = pipeline_name
    if metadata["fast_or_slow_mode"] == "fast":
        if "fast" not in pipe_name:
            pipe_name = pipe_name[:-7] + "_fast.cppipe"
    print(f"Pipeline name is {pipe_name}")

    # First let's check if it seems like the whole thing is done or not
    sqs = boto3.client("sqs")

    filter_prefix = image_prefix + batch + "/illum"
    expected_len = int(metadata["barcoding_cycles"]) * len(platelist) * 5

    done = helpful_functions.check_if_run_done(
        s3,
        bucket_name,
        filter_prefix,
        expected_len,
        current_app_name,
        prev_step_app_name,
        sqs,
        duplicate_queue_name,
        filter_in="Cycle",
    )

    if not done:
        print("Still work ongoing")
        return "Still work ongoing"
    else:
        # First thing first, let's make an easier-to-use plate and well list and save it
        plate_and_well_list = []
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = list(platedict["1"].keys())
            for eachwell in well_list:
                plate_and_well_list.append((eachplate, eachwell))
        metadata["barcoding_plate_and_well_list"] = plate_and_well_list
        helpful_functions.write_metadata_file(
            s3, bucket_name, metadata, metadata_file_name, metadata_on_bucket_name
        )
        # Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            bucket_folder = (
                "/home/ubuntu/bucket/" + image_prefix + batch + "/images/" + eachplate
            )
            illum_folder = (
                "/home/ubuntu/bucket/" + image_prefix + batch + "/illum/" + eachplate
            )
            per_plate_csv = create_CSVs.create_CSV_pipeline6(
                eachplate,
                num_series,
                expected_cycles,
                bucket_folder,
                illum_folder,
                platedict,
                metadata["one_or_many_files"],
                metadata["fast_or_slow_mode"],
            )
            csv_on_bucket_name = (
                prefix
                + "load_data_csv/"
                + batch
                + "/"
                + eachplate
                + "/load_data_pipeline6.csv"
            )
            print("Created", csv_on_bucket_name)
            with open(per_plate_csv, "rb") as a:
                s3.put_object(Body=a, Bucket=bucket_name, Key=csv_on_bucket_name)

        # now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        # make the jobs
        create_batch_jobs.create_batch_jobs_6(
            image_prefix,
            batch,
            pipe_name,
            plate_and_well_list,
            app_name,
            metadata["one_or_many_files"],
            num_series,
        )

        # Start a cluster
        if metadata["one_or_many_files"] == "one":
            njobs = len(plate_and_well_list) * 19
        else:
            njobs = len(plate_and_well_list) * num_series
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name, njobs)

        # Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print("Go run the monitor now")
        return "Cluster started"
示例#10
0
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    plate = key.split('/')[-2].split('-')[0]
    batch = key.split('/')[-4]
    image_prefix = key.split(batch)[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    print(plate, batch, image_prefix, prefix)

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    image_dict = metadata['painting_file_data']
    num_series = int(metadata['painting_rows']) * int(
        metadata['painting_columns'])
    if "painting_imperwell" in metadata.keys():
        if metadata["painting_imperwell"] != "":
            if int(metadata["painting_imperwell"]) != 0:
                num_series = int(metadata["painting_imperwell"])
    expected_files_per_well = np.ceil(float(num_series) / range_skip)
    platelist = image_dict.keys()
    plate_and_well_list = []
    for eachplate in platelist:
        platedict = image_dict[eachplate]
        well_list = platedict.keys()
        for eachwell in well_list:
            plate_and_well_list.append((eachplate, eachwell))
    metadata['painting_plate_and_well_list'] = plate_and_well_list
    helpful_functions.write_metadata_file(s3, bucket_name, metadata,
                                          metadata_file_name,
                                          metadata_on_bucket_name)

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/images_corrected/painting'
    #Because this step is batched per site (not well) don't need anticipate partial loading of jobs
    expected_len = (len(plate_and_well_list) *
                    expected_files_per_well) + (6 * (len(platelist)))

    done = helpful_functions.check_if_run_done(s3, bucket_name, filter_prefix,
                                               expected_len, current_app_name,
                                               prev_step_app_name, sqs,
                                               duplicate_queue_name)

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        # first let's just try to run the monitor on the last step, in case we haven't yet
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name,
                                     prefix,
                                     batch,
                                     step,
                                     cellprofiler=False)

        #make the jobs
        create_batch_jobs.create_batch_jobs_4(image_prefix,
                                              batch,
                                              metadata,
                                              plate_and_well_list,
                                              app_name,
                                              tileperside=tileperside,
                                              final_tile_size=final_tile_size)

        #Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            len(plate_and_well_list))

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')
示例#11
0
def lambda_handler(event, context):
    # Log the received event
    bucket_name = event['Records'][0]['s3']['bucket']['name']
    key = event['Records'][0]['s3']['object']['key']
    keys = [x['s3']['object']['key'] for x in event['Records']]
    if '.cppipe' not in key:
        plate = key.split('/')[-2].split('_')[0]
        batch = key.split('/')[-5]
        image_prefix = key.split(batch)[0]
        print(plate)
    else:
        batch = key.split('/')[-2]
        image_prefix = key.split('workspace')[0]
    prefix = os.path.join(image_prefix, 'workspace/')

    print(batch, image_prefix, prefix)

    #get the metadata file, so we can add stuff to it
    metadata_on_bucket_name = os.path.join(prefix, 'metadata', batch,
                                           'metadata.json')
    print('Loading', metadata_on_bucket_name)
    metadata = helpful_functions.download_and_read_metadata_file(
        s3, bucket_name, metadata_file_name, metadata_on_bucket_name)

    plate_and_well_list = metadata['barcoding_plate_and_well_list']
    image_dict = metadata['wells_with_all_cycles']
    expected_cycles = metadata['barcoding_cycles']
    platelist = image_dict.keys()
    num_series = int(metadata['barcoding_rows']) * int(
        metadata['barcoding_columns'])
    if "barcoding_imperwell" in metadata.keys():
        if metadata["barcoding_imperwell"] != "":
            if int(metadata["barcoding_imperwell"]) != 0:
                num_series = int(metadata["barcoding_imperwell"])
    expected_files_per_well = (num_series * (
        (int(metadata['barcoding_cycles']) * 4) + 1)) + 3
    num_sites = len(plate_and_well_list) * num_series

    #First let's check if it seems like the whole thing is done or not
    sqs = boto3.client('sqs')

    filter_prefix = image_prefix + batch + '/images_aligned/barcoding'
    #Expected length shows that all transfers (i.e. all wells) have at least started
    expected_len = (
        (len(plate_and_well_list) - 1) * expected_files_per_well) + 1

    done = helpful_functions.check_if_run_done(s3, bucket_name, filter_prefix,
                                               expected_len, current_app_name,
                                               prev_step_app_name, sqs,
                                               duplicate_queue_name)

    if not done:
        print('Still work ongoing')
        return ('Still work ongoing')
    else:
        #Pull the file names we care about, and make the CSV
        for eachplate in platelist:
            platedict = image_dict[eachplate]
            well_list = platedict['1'].keys()
            bucket_folder = '/home/ubuntu/bucket/' + image_prefix + batch + '/images_aligned/barcoding'
            per_plate_csv = create_CSVs.create_CSV_pipeline7(
                eachplate, num_series, expected_cycles, bucket_folder,
                well_list)
            csv_on_bucket_name = prefix + 'load_data_csv/' + batch + '/' + eachplate + '/load_data_pipeline7.csv'
            print('Created', csv_on_bucket_name)
            with open(per_plate_csv, 'rb') as a:
                s3.put_object(Body=a,
                              Bucket=bucket_name,
                              Key=csv_on_bucket_name)

        # first let's just try to run the monitor on the last step, in case we haven't yet
        helpful_functions.try_a_shutdown(s3, bucket_name, prefix, batch,
                                         prev_step_num, prev_step_app_name)

        #now let's do our stuff!
        app_name = run_DCP.run_setup(bucket_name, prefix, batch, step)

        #make the jobs
        create_batch_jobs.create_batch_jobs_7(image_prefix, batch,
                                              pipeline_name,
                                              plate_and_well_list,
                                              range(num_series), app_name)

        #Start a cluster
        run_DCP.run_cluster(bucket_name, prefix, batch, step, fleet_file_name,
                            num_sites)

        #Run the monitor
        run_DCP.run_monitor(bucket_name, prefix, batch, step)
        print('Go run the monitor now')
        return ('Cluster started')