def _group_channels(url_list, order): """ given a list of img urls, this will group them into the same well and site, per plate Arguments: ----------- order : boolean sort channel numbers into numerical order """ grouped_list = [] urls = [parse.img_filename(i) for i in url_list] tmp_df = pd.DataFrame(list(url_list), columns=["img_url"]) tmp_df["plate_name"] = [parse.plate_name(i) for i in url_list] tmp_df["plate_num"] = [parse.plate_num(i) for i in url_list] # get_well and get_site use the image URL rather than the full path tmp_df["well"] = [parse.img_well(i) for i in urls] tmp_df["site"] = [parse.img_site(i) for i in urls] grouped_df = tmp_df.groupby( ["plate_name", "plate_num", "well", "site"]) if order is True: # order by channel for _, group in grouped_df: grouped = list(group["img_url"]) channel_nums = [parse.img_channel(i) for i in grouped] # create tuple(path, channel_number) and sort by channel_number sort_im = sorted(zip(grouped, channel_nums), key=lambda x: x[1]) # return only the file-paths back from the list of tuples grouped_list.append([i[0] for i in sort_im]) elif order is False: for _, group in grouped_df: grouped_list.append(list(group["img_url"])) else: raise ValueError("order needs to be a boolean") return grouped_list
def keep_channels(img_list, channels): """ given a list of image paths, this will keep specified channel numbers, and remove all others. Parameters: ----------- img_list : list list of image URLs channels : list of integers list of channel numbers to keep Returns: -------- list of image URLs """ # find if img_urls are full paths or just filenames if utils.is_full_path(img_list[0]): just_file_path = [parse.img_filename(i) for i in img_list] else: just_file_path = img_list channel_nums = [parse.img_channel(i) for i in just_file_path] # make sure we zip the original img_list, *not* just_file_path ch_img_tup = zip(channel_nums, img_list) filtered_tup = [i for i in ch_img_tup if i[0] in channels] _, img_urls = zip(*filtered_tup) return img_urls
def test_ImageDict_keep_channels(): channels_to_keep = [1, 2, 3] ImgDict = image_prep.ImageDict() ans = ImgDict.keep_channels(IMG_URLS, channels_to_keep) # parse channel numbers out of ans img_names = [parse.img_filename(f) for f in ans] img_channels = [parse.img_channel(name) for name in img_names] for channel in img_channels: assert channel in channels_to_keep
def test_ImageDict_remove_channels(): channels_to_remove = [4, 5] ImgDict = image_prep.ImageDict() ans = ImgDict.remove_channels(IMG_URLS, channels_to_remove) # parse channel numbers out of ans img_names = [parse.img_filename(f) for f in ans] img_channels = [parse.img_channel(name) for name in img_names] for channel in img_channels: assert channel not in channels_to_remove
def _well_site_table(img_list): """return pandas dataframe with metadata columns""" final_files = [_parse.img_filename(i) for i in img_list] df_img = _pd.DataFrame({ "img_paths": img_list, "Metadata_well": [_parse.img_well(i) for i in final_files], "Metadata_site": [_parse.img_site(i) for i in final_files] }) return df_img
def get_wells(img_list, wells_to_get, plate=None): """ given a list of image paths, this will return the images matching the well or wells given in well Parameters: ----------- img_list : list list of image URLs well : string or list of strings which well(s) to select plate: string or list of strings (default = None) get wells per specified plate(s) """ # parse wells from metadata if plate is None: # ignore plate labels, get all matching wells wells = [parse.img_well(path) for path in img_list] combined = zip(img_list, wells) if isinstance(wells_to_get, list): wanted_images = [] for i in wells_to_get: for path, parsed_well in combined: if i == parsed_well: wanted_images.append(path) elif isinstance(wells_to_get, str): wanted_images = [] for path, parsed_well in combined: if wells_to_get == parsed_well: wanted_images.append(path) return wanted_images else: # get wells per specified plate(s) wanted_images = [] if isinstance(wells_to_get, str): wells_to_get = [wells_to_get] if isinstance(plate, str): plate = [plate] urls = [parse.img_filename(i) for i in img_list] tmp_df = pd.DataFrame(list(img_list), columns=["img_url"]) tmp_df["plate_name"] = [parse.plate_name(i) for i in img_list] tmp_df["well"] = [parse.img_well(i) for i in urls] tmp_df["site"] = [parse.img_site(i) for i in urls] grouping_cols = ["plate_name"] grouped_df = tmp_df.groupby(grouping_cols) for name, group in grouped_df: if name in plate: # get only wells that match well tmp_urls = group[group.well.isin( wells_to_get)].img_url.tolist() wanted_images.extend(tmp_urls) return wanted_images
def create_long_loaddata(img_list): """ create a dataframe of image paths with metadata columns """ just_filenames = [_parse.img_filename(i) for i in img_list] df_img = _pd.DataFrame({ "URL": just_filenames, "path": [_parse.path(i) for i in img_list], "Metadata_platename": [_parse.plate_name(i) for i in img_list], "Metadata_well": [_parse.img_well(i) for i in just_filenames], "Metadata_site": [_parse.img_site(i) for i in just_filenames], "Metadata_channel": [_parse.img_channel(i) for i in just_filenames], "Metadata_platenum": [_parse.plate_num(i) for i in img_list] }) return df_img
def test_img_channel(): filename = parse.img_filename(EXAMPLE_PATH) new_filename = parse.img_filename(NEW_EXAMPLE_PATH) assert parse.img_channel(filename) == 1 assert parse.img_channel(new_filename) == 1
def test_img_site(): filename = parse.img_filename(EXAMPLE_PATH) new_filename = parse.img_filename(NEW_EXAMPLE_PATH) assert parse.img_site(filename) == 1 assert parse.img_site(new_filename) == 1
def test_img_well(): filename = parse.img_filename(EXAMPLE_PATH) new_filename = parse.img_filename(NEW_EXAMPLE_PATH) assert parse.img_well(filename) == "B02" assert parse.img_well(new_filename) == "B02"
def test_img_filename(): expected = "val screen_B02_s1_w1_thumb62D4A363-7C7E-40D0-8A9E-55EC6681574D.tif" assert parse.img_filename(EXAMPLE_PATH) == expected assert parse.img_filename(NEW_EXAMPLE_PATH) == expected