def _group_channels(url_list, order): """ given a list of img urls, this will group them into the same well and site, per plate Arguments: ----------- order : boolean sort channel numbers into numerical order """ grouped_list = [] urls = [parse.img_filename(i) for i in url_list] tmp_df = pd.DataFrame(list(url_list), columns=["img_url"]) tmp_df["plate_name"] = [parse.plate_name(i) for i in url_list] tmp_df["plate_num"] = [parse.plate_num(i) for i in url_list] # get_well and get_site use the image URL rather than the full path tmp_df["well"] = [parse.img_well(i) for i in urls] tmp_df["site"] = [parse.img_site(i) for i in urls] grouped_df = tmp_df.groupby( ["plate_name", "plate_num", "well", "site"]) if order is True: # order by channel for _, group in grouped_df: grouped = list(group["img_url"]) channel_nums = [parse.img_channel(i) for i in grouped] # create tuple(path, channel_number) and sort by channel_number sort_im = sorted(zip(grouped, channel_nums), key=lambda x: x[1]) # return only the file-paths back from the list of tuples grouped_list.append([i[0] for i in sort_im]) elif order is False: for _, group in grouped_df: grouped_list.append(list(group["img_url"])) else: raise ValueError("order needs to be a boolean") return grouped_list
def keep_channels(img_list, channels): """ given a list of image paths, this will keep specified channel numbers, and remove all others. Parameters: ----------- img_list : list list of image URLs channels : list of integers list of channel numbers to keep Returns: -------- list of image URLs """ # find if img_urls are full paths or just filenames if utils.is_full_path(img_list[0]): just_file_path = [parse.img_filename(i) for i in img_list] else: just_file_path = img_list channel_nums = [parse.img_channel(i) for i in just_file_path] # make sure we zip the original img_list, *not* just_file_path ch_img_tup = zip(channel_nums, img_list) filtered_tup = [i for i in ch_img_tup if i[0] in channels] _, img_urls = zip(*filtered_tup) return img_urls
def test_ImageDict_keep_channels(): channels_to_keep = [1, 2, 3] ImgDict = image_prep.ImageDict() ans = ImgDict.keep_channels(IMG_URLS, channels_to_keep) # parse channel numbers out of ans img_names = [parse.img_filename(f) for f in ans] img_channels = [parse.img_channel(name) for name in img_names] for channel in img_channels: assert channel in channels_to_keep
def test_ImageDict_remove_channels(): channels_to_remove = [4, 5] ImgDict = image_prep.ImageDict() ans = ImgDict.remove_channels(IMG_URLS, channels_to_remove) # parse channel numbers out of ans img_names = [parse.img_filename(f) for f in ans] img_channels = [parse.img_channel(name) for name in img_names] for channel in img_channels: assert channel not in channels_to_remove
def _group_images(df_img): """group images by well and site""" grouped_list = [] for _, group in df_img.groupby(["Metadata_well", "Metadata_site"]): grouped = list(group["img_paths"]) channel_nums = [_parse.img_channel(i) for i in grouped] # create tuple (path, channel_number) and sort by channel number sort_im = sorted(list(zip(grouped, channel_nums)), key=lambda x: x[1]) # return on the file-paths back from the list of tuples grouped_list.append([i[0] for i in sort_im]) return grouped_list
def test_ImageDict_sort_channels(): ImgDict = image_prep.ImageDict() # un-sorted channels # reverse channels as already sorted rev_img_urls = IMG_URLS[::-1] ImgDict.add_class("foo", rev_img_urls) ImgDict.group_image_channels(order=False) order_false_dict = ImgDict.parent_dict order_false_vals = order_false_dict["foo"][0] order_false_chnnls = [parse.img_channel(val) for val in order_false_vals] assert sorted(order_false_chnnls) != order_false_chnnls # sort channels # need to create new ImageDict class otherwise we get a warning due to # adding a new class to already grouped data ImgDict2 = image_prep.ImageDict() ImgDict2.add_class("bar", IMG_URLS) ImgDict2.group_image_channels(order=True) order_true_dict = ImgDict2.parent_dict order_true_vals = order_true_dict["bar"][0] order_true_chnnls = [parse.img_channel(val) for val in order_true_vals] assert sorted(order_true_chnnls) == order_true_chnnls
def create_long_loaddata(img_list): """ create a dataframe of image paths with metadata columns """ just_filenames = [_parse.img_filename(i) for i in img_list] df_img = _pd.DataFrame({ "URL": just_filenames, "path": [_parse.path(i) for i in img_list], "Metadata_platename": [_parse.plate_name(i) for i in img_list], "Metadata_well": [_parse.img_well(i) for i in just_filenames], "Metadata_site": [_parse.img_site(i) for i in just_filenames], "Metadata_channel": [_parse.img_channel(i) for i in just_filenames], "Metadata_platenum": [_parse.plate_num(i) for i in img_list] }) return df_img
def test_img_channel(): filename = parse.img_filename(EXAMPLE_PATH) new_filename = parse.img_filename(NEW_EXAMPLE_PATH) assert parse.img_channel(filename) == 1 assert parse.img_channel(new_filename) == 1