def evaluate(object_folder): test_images_list = os.path.join(object_folder, 'test_images_list.csv') test_labels_list = os.path.join(object_folder, 'test_labels_list.csv') test_image_filenames = KScsv.read_csv(test_images_list) test_label_filenames = KScsv.read_csv(test_labels_list) all_prediction = list() all_label = list() f1score_per_image = list() all_score = list() for i_image, (image_file, label_file) in enumerate( zip(test_image_filenames, test_label_filenames)): tick = time.time() basename = os.path.basename(image_file[0]) basename = os.path.splitext(basename)[0] image_file = os.path.join(object_folder, 'result', basename + '.mat') # Read in result and label mat_content = matlab.load(image_file) score = mat_content['mask'] prediction = score > 0.5 prediction = prediction.astype('float') label = KSimage.imread(label_file[0]) label = label.astype('float') label = label / 255.0 label = label > 0.5 label = label.astype('float') prediction = np.reshape(prediction, -1) label = np.reshape(label, -1) score = np.reshape(score, -1) all_prediction.append(prediction) all_label.append(label) all_score.append(score) f1score = metrics.f1_score(label, prediction, average='binary') f1score_per_image.append(f1score) duration = time.time() - tick print('evaluate %d / %d (%.2f sec)' % (i_image + 1, len(test_image_filenames), duration)) all_label = np.reshape(np.array(all_label), -1) all_prediction = np.reshape(np.array(all_prediction), -1) all_score = np.reshape(np.array(all_score), -1) total_f1score = metrics.f1_score(all_label, all_prediction, average='binary') avg_f1score = np.mean(f1score_per_image) average_precision = metrics.average_precision_score(all_label, all_score, average='micro') return total_f1score, avg_f1score, average_precision, f1score_per_image
def inputs2(object_folder, mode, flags, mat_contents): if mode == 'train': log_file_path = os.path.join(object_folder, 'train', 'train_log.csv') else: log_file_path = os.path.join(object_folder, 'val', 'val_log.csv') log_list = KScsv.read_csv(log_file_path) #key_list = ['HE', 'DAPI', 'weight'] key_list = list( flags['dict_path'].keys())[:-1] # all items except last item: 'group' allimageslist = [] for ind, key in enumerate(key_list): image_dict = collections.defaultdict(list) #label_dict = collections.defaultdict(list) #weight_dict = collections.defaultdict(list) for row in log_list: image_dict[key].append(row[ind]) #label_dict['label'].append(row[1]) #weight_dict['weight'].append(row[2]) min_queue_examples = int( len(image_dict[key[0]]) * flags['min_fraction_of_examples_in_queue']) print('Filling queue with %d images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Create a queue that produces the filenames to read. combine_image_dict = list() #combine_label_dict = list() #combine_weight_dict = list() allimageslist.append(image_dict[key]) filename_queue = tf.train.slice_input_producer(allimageslist, shuffle=True) queue_dict = read_data(filename_queue, flags) image, label, weight = process_image_and_label(image, label, weight, mean_image, variance_image, flags) # Generate a batch of images and labels by building up a queue of examples. image, label, weight = generate_batch(image, label, weight, min_queue_examples, int(flags['batch_size']), shuffle=False, flags=flags) combine_image_dict.append(image) combine_label_dict.append(label) combine_weight_dict.append(weight) out_image = tf.concat(combine_image_dict, 0) out_label = tf.concat(combine_label_dict, 0) out_weight = tf.concat(combine_weight_dict, 0) return {'images': out_image, 'labels': out_label, 'weights': out_weight}
def main(argv): he_log_file = argv[0] he_dcis_segmentation_result_path = argv[1] igpu = argv[2] row_list = KScsv.read_csv(he_log_file) main_he_dcis_segmentation.main(1, 'test_model', flags_he_dcis_segmentation, row_list, he_dcis_segmentation_result_path, igpu)
def main(argv): file_list = argv[0] result_path = argv[1] he_dcis_segmentation_result_path = argv[2] igpu = argv[3] row_list = KScsv.read_csv(file_list) main_probe_detection.main(1, 'test_model', flags_probe_detection_green, row_list, result_path, he_dcis_segmentation_result_path, igpu)
def main(argv): file_list = argv[0] he_cell_segmentation_result_path = argv[1] he_dcis_segmentation_result_path = argv[2] igpu = argv[3] row_list = KScsv.read_csv(file_list) main_he_cell_segmentation.main(1, 'test_model', flags_he_cell_segmentation, row_list, he_cell_segmentation_result_path, he_dcis_segmentation_result_path, igpu)
def inputs(mean_image, variance_image, object_folder, mode, flags): if mode == 'train': log_file_path = os.path.join(object_folder, 'train', 'train_log.csv') else: log_file_path = os.path.join(object_folder, 'val', 'val_log.csv') log_list = KScsv.read_csv(log_file_path) image_dict = collections.defaultdict(list) label_dict = collections.defaultdict(list) for row in log_list: for i_class in range(flags['n_classes']): if int(row[2]) == i_class: image_dict[i_class].append(row[0]) label_dict[i_class].append(int(row[2])) min_queue_examples = int( np.sum([len(image_dict[k]) for k in image_dict.keys()]) * flags['min_fraction_of_examples_in_queue']) print('Filling queue with %d images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Create a queue that produces the filenames to read. combine_image_dict = list() combine_label_dict = list() for i_class in range(flags['n_classes']): filename_queue = tf.train.slice_input_producer( [image_dict[i_class], label_dict[i_class]], shuffle=True) image, label = read_data(filename_queue, flags) image = tf.cast(image, tf.float32) label = tf.cast(label, tf.float32) image, label = process_image_and_label(image, label, mean_image, variance_image, flags) # Generate a batch of images and labels by building up a queue of examples. image, label = generate_batch(image, label, min_queue_examples, int(flags['batch_size'] / flags['n_classes']), shuffle=False, flags=flags) combine_image_dict.append(image) combine_label_dict.append(label) out_image = tf.concat(0, combine_image_dict) out_label = tf.concat(0, combine_label_dict) return {'images': out_image, 'labels': out_label}
def select_train_val_instances(nth_fold, method, flags): """ select_train_val_instances is used to balance the class instances found in the training and validation sets param: nth_fold param: method return: void """ # check if log files exist list_dir = os.listdir(os.path.join(flags['experiment_folder'])) if ('cv' + str(nth_fold) in list_dir) and ('perm' + str(nth_fold) in list_dir): raise ValueError('Dangerous! You have both cv and perm on the path.') elif 'cv' + str(nth_fold) in list_dir: object_folder = os.path.join(flags['experiment_folder'], 'cv' + str(nth_fold)) elif 'perm' + str(nth_fold) in list_dir: object_folder = os.path.join(flags['experiment_folder'], 'perm' + str(nth_fold)) else: raise ValueError('No cv or perm folder!') train_log_file_path = os.path.join(object_folder, 'train', 'train_log.csv') val_log_file_path = os.path.join(object_folder, 'val', 'val_log.csv') if not os.path.isfile(train_log_file_path): raise ValueError('no ' + train_log_file_path) if not os.path.isfile(val_log_file_path): raise ValueError('no ' + val_log_file_path) # read csv train_log = KScsv.read_csv(train_log_file_path) val_log = KScsv.read_csv(val_log_file_path) # count the number if method == 'by_numbers': train_log = select_instances.by_numbers(train_log) val_log = select_instances.by_numbers(val_log) KScsv.write_csv(train_log, train_log_file_path) KScsv.write_csv(val_log, val_log_file_path) else: raise ValueError('no method ' + method + ' exists!')
def main(nth_fold, mode, flags, testdir): """ main trains, tests, or executes the model on the provided data based on the specified preferences param: nth_fold param: mode param: experiment_folder param: image_ext param: test_model param: test_image_list return: saves segmentation results to appropriate file/directory """ # check if cv or perm list_dir = os.listdir(os.path.join(flags['experiment_folder'])) if ('cv' + str(nth_fold) in list_dir) and ('perm' + str(nth_fold) in list_dir): raise ValueError('Dangerous! You have both cv and perm on the path.') elif 'cv' + str(nth_fold) in list_dir: object_folder = os.path.join(flags['experiment_folder'], 'cv' + str(nth_fold)) elif 'perm' + str(nth_fold) in list_dir: object_folder = os.path.join(flags['experiment_folder'], 'perm' + str(nth_fold)) else: raise ValueError('No cv or perm folder!') # Train model if mode == 'train': checkpoint_folder = os.path.join(object_folder, 'checkpoint') network_stats_file_path = os.path.join(checkpoint_folder, 'network_stats.mat') train_images_folder = os.path.join(object_folder, 'train', 'image') if not os.path.isfile(network_stats_file_path): list_images = glob.glob( os.path.join(train_images_folder, '*' + flags['image_ext'])) print('calculating mean and variance image') mean_image, variance_image = utils.calculate_mean_variance_image( list_images) routine.create_dir(checkpoint_folder) matlab.save(network_stats_file_path, { 'mean_image': mean_image, 'variance_image': variance_image }) tf_model_train.train(object_folder, flags) # Test model on validation set elif mode == 'test_model': checkpointlist = glob.glob( os.path.join(object_folder, 'checkpoint', 'model*meta')) checkpointlist = [ file for file in checkpointlist if 'pretrain' not in file ] temp = [] for filepath in checkpointlist: basename = os.path.basename(filepath) temp.append(int(float(basename.split('-')[-1].split('.')[0]))) temp = np.sort(temp) model_path = os.path.join( object_folder, 'checkpoint', 'model.ckpt-' + str(temp[flags['test_model']])) print('use epoch %d : model %s' % (flags['test_model'], 'model.ckpt-' + str(temp[flags['test_model']]))) test_images_list = flags['test_image_list'] filename_list = KScsv.read_csv(test_images_list) tf_model_test.test(object_folder, model_path, filename_list, flags) #Segment WSIs elif mode == 'test_WSI': checkpointlist = glob.glob( os.path.join(object_folder, 'checkpoint', 'model*meta')) checkpointlist = [ file for file in checkpointlist if 'pretrain' not in file ] temp = [] for filepath in checkpointlist: basename = os.path.basename(filepath) temp.append(int(float(basename.split('-')[-1].split('.')[0]))) temp = np.sort(temp) model_path = os.path.join( object_folder, 'checkpoint', 'model.ckpt-' + str(temp[flags['test_model']])) print('use epoch %d : model %s' % (flags['test_model'], 'model.ckpt-' + str(temp[flags['test_model']]))) #should iterate over all subdirectories paths = get_immediate_subdirectories(testdir) list.sort(paths) #sort WSIs into ascending numerical order #paths = paths[100:] #TODO: Enable based on which batch this code is running print("TEST DIR: " + str(testdir)) for path in paths: print(os.path.join(testdir, path)) if not os.path.isdir( os.path.join(testdir, path + 'epiStromalSeg') ): #prevents this from being executed with exsiting directories tf_model_test.testWSI(object_folder, model_path, os.path.join(testdir, path), flags) #TODO: uncomment to process only controls #imageCSV = open(os.path.join('/data', 'avellal14', 'WSI_patches', 'BBD_NCC_Covariate_Outcome_KK_JH_modifiedWithPaths.csv'),'rb') #reader = csv.reader(imageCSV) #csvList = list(reader) #patientId = path[:path.index('_')] #caseControlList = next(subl for subl in csvList if patientId in subl) #TODO: uncomment to process only cases # if(caseControlList[1] == '1'): #only test the WSI if the image is indeed a case(1) # tf_model_test.testWSI(object_folder, model_path, os.path.join(testdir,path), flags) #Segment WSIs at patient level using data from CSV elif mode == 'test_Case_Control': checkpointlist = glob.glob( os.path.join(object_folder, 'checkpoint', 'model*meta')) checkpointlist = [ file for file in checkpointlist if 'pretrain' not in file ] temp = [] for filepath in checkpointlist: basename = os.path.basename(filepath) temp.append(int(float(basename.split('-')[-1].split('.')[0]))) temp = np.sort(temp) model_path = os.path.join( object_folder, 'checkpoint', 'model.ckpt-' + str(temp[flags['test_model']])) print('use epoch %d : model %s' % (flags['test_model'], 'model.ckpt-' + str(temp[flags['test_model']]))) with open( os.path.join('/home', 'avellal14', 'data', 'Adithya_BBD_NHS', 'NHS_BBD_CODE', 'casesAndMatchedControls224.csv')) as csvFile: csvReader = csv.DictReader(csvFile) for row in csvReader: if (row['path'] == 'BBD_NCC_extractedat20x' or row['path'] == 'BBD_NCC_extractedat20x_round2'): testdir = os.path.join('/home', 'avellal14', 'data', 'Adithya_BBD_NHS', row['path']) paths = get_subdirectories_by_patient(testdir, row['id']) for path in paths: print('CURRENT WSI BEING SEGMENTED', os.path.join(testdir, path)) if not os.path.isdir( os.path.join(testdir, path + '_cellSeg') ): #prevents this from being executed with exsiting directories tf_model_test.testWSI(object_folder, model_path, os.path.join(testdir, path), flags)
def gen_train_val_data(nth_fold, flags): """ gen_train_val_data generates training and validation data for training the network. It builds directories for train and test and extract patches according to the provided 'method', and it maintains a log file containing the contents of all the data splits param: nth_fold param method: sliding_window return: void """ ########## check whether 'cv' or 'perm' exists and which one to use ########## list_dir = os.listdir(os.path.join(flags['experiment_folder'])) if ('cv' + str(nth_fold) in list_dir) and ('perm' + str(nth_fold) in list_dir): raise ValueError('Dangerous! You have both cv and perm on the path.') elif 'cv' + str(nth_fold) in list_dir: object_folder = os.path.join(flags['experiment_folder'], 'cv' + str(nth_fold)) elif 'perm' + str(nth_fold) in list_dir: object_folder = os.path.join(flags['experiment_folder'], 'perm' + str(nth_fold)) else: raise ValueError('No cv or perm folder!') ########## create train and val paths ########## path_dict = dict() path_dict['train_folder'] = os.path.join(object_folder, 'train') path_dict['val_folder'] = os.path.join(object_folder, 'val') create_dir(path_dict['train_folder']) create_dir(path_dict['val_folder']) print("Gets to the beginning of an if statement") ########## extract patches and put in a designated directory ########## if flags['gen_train_val_method'] == 'sliding_window': key_list = ['image', 'groundtruth', 'weight'] for key in key_list: path_dict['train_' + key + '_folder'] = os.path.join( path_dict['train_folder'], key) create_dir(path_dict['train_' + key + '_folder']) path_dict['val_' + key + '_folder'] = os.path.join( path_dict['val_folder'], key) create_dir(path_dict['val_' + key + '_folder']) list_dict = dict() for key in key_list: list_dict['train_' + key + '_list'] = KScsv.read_csv( os.path.join(object_folder, 'train_' + key + '_list.csv')) list_dict['val_' + key + '_list'] = KScsv.read_csv( os.path.join(object_folder, 'val_' + key + '_list.csv')) ########## train ########## for key in ['train', 'val']: if not os.path.isfile( os.path.join(path_dict[key + '_folder'], key + '_log.csv')): log_data = list() for i_image in range(len(list_dict[key + '_image_list'])): tic = time.time() path_image = list_dict[key + '_image_list'][i_image][0] path_groundtruth = list_dict[ key + '_groundtruth_list'][i_image][0] path_weight = list_dict[key + '_weight_list'][i_image][0] #Resize image, groundtruth, and weight from 10x input size to 2.5x (level at which network operates) image = KSimage.imread(path_image) image = KSimage.imresize(image, 0.25) groundtruth = KSimage.imread(path_groundtruth) groundtruth = KSimage.imresize(groundtruth, 0.25) weight = KSimage.imread(path_weight) weight = KSimage.imresize(weight, 0.25) #make sure that groundtruth images have depth = 1 if (len(groundtruth.shape) > 2 and groundtruth.shape[2] > 1): groundtruth = groundtruth[:, :, 1] groundtruth[ groundtruth == 3] = 2 #remove all intra-stromal epithelium labels and set them simply to stroma groundtruth[ groundtruth == 4] = 3 #fat label was originally 4 but is now changed to 3 dict_obj = { 'image': image, 'groundtruth': groundtruth, 'weight': weight } extractor = extract_patches.sliding_window( dict_obj, flags['size_input_patch'], flags['size_output_patch'], flags['stride']) for j, (out_obj_dict, coord_dict) in enumerate(extractor): images = out_obj_dict['image'] groundtruths = out_obj_dict['groundtruth'] weights = out_obj_dict['weight'] coord_images = coord_dict['image'] ############################################################# basename = os.path.basename(path_image) basename = os.path.splitext(basename)[0] image_name = os.path.join( path_dict[key + '_image_folder'], basename + '_idx' + str(j) + '_row' + str(coord_images[0]) + '_col' + str(coord_images[1]) + flags['image_ext']) label_name = os.path.join( path_dict[key + '_groundtruth_folder'], basename + '_idx' + str(j) + '_row' + str(coord_images[0]) + '_col' + str(coord_images[1]) + flags['groundtruth_ext']) weight_name = os.path.join( path_dict[key + '_weight_folder'], basename + '_idx' + str(j) + '_row' + str(coord_images[0]) + '_col' + str(coord_images[1]) + flags['weight_ext']) if not os.path.isfile(image_name): KSimage.imwrite(images, image_name) if not os.path.isfile(label_name): KSimage.imwrite(groundtruths, label_name) if not os.path.isfile(weight_name): KSimage.imwrite(weights, weight_name) log_data.append((image_name, label_name, weight_name)) print('finish processing %d image from %d images : %.2f' % (i_image + 1, len(list_dict[key + '_image_list']), time.time() - tic)) KScsv.write_csv( log_data, os.path.join(path_dict[key + '_folder'], key + '_log.csv')) #################################################################################################################### else: print( "ONLY SLIDING WINDOW TRAINING IS SUPPORTED!!!! Training terminated." ) return
def split_perm(obj_list, flags): """ split_perm splits data using permutation with stratification based on group label param: images_list param: labels_list param: groups_list param: num param: test_percentage param: val_percentage return: void """ num = flags['num_split'] test_percentage = flags['test_percentage'] val_percentage = flags['val_percentage'] groups_label = list() for file in obj_list['group']: row = KScsv.read_csv(file) groups_label.append(row[0][0]) groups_label = np.array(groups_label) for key in obj_list.keys(): obj_list[key] = np.array(obj_list[key]) if test_percentage != 0: skf = StratifiedShuffleSplit(n_splits=num, test_size=test_percentage / 100.0) for i_num, (train_idx, test_idx) in enumerate( skf.split(obj_list['image'], groups_label)): cv_folder = os.path.join(flags['experiment_folder'], 'perm' + str(i_num + 1)) create_dir(cv_folder) test_obj_list_dict = dict() train_obj_list_dict = dict() for key in obj_list.keys(): test_obj_list_dict[key] = obj_list[key][test_idx] train_obj_list_dict[key] = obj_list[key][train_idx] train_groups_label = groups_label[train_idx] sss = StratifiedShuffleSplit(n_splits=1, test_size=val_percentage / 100.0) for train_train_index, train_val_index in sss.split( train_obj_list_dict['image'], train_groups_label): train_train_obj_list_dict = dict() train_val_obj_list_dict = dict() for key in train_obj_list_dict.keys(): train_train_obj_list_dict[key] = train_obj_list_dict[key][ train_train_index] train_val_obj_list_dict[key] = train_obj_list_dict[key][ train_val_index] ################################################################# # test for key in test_obj_list_dict.keys(): filename = os.path.join(cv_folder, 'test_' + key + '_list.csv') if not os.path.isfile(filename): row_list = [[item] for item in test_obj_list_dict[key]] KScsv.write_csv(row_list, filename) ################################################################# # train dict_path = flags['dict_path'] dict_ext = flags['dict_ext'] obj_list_dict = dict() for key in dict_path.keys(): obj_list_dict[key] = glob.glob( os.path.join(dict_path[key], '*' + dict_ext[key])) temp_train_train_obj_list_dict = collections.defaultdict(list) for name in train_train_obj_list_dict['image']: basename = os.path.basename(name) basename = os.path.splitext(basename)[0] matching = sorted( [s for s in obj_list_dict['image'] if basename in s]) for m in matching: basename = os.path.basename(m) basename = os.path.splitext(basename)[0] basename_dict = dict() for key in train_train_obj_list_dict.keys(): basename_dict[key] = os.path.join( dict_path[key], basename + dict_ext[key]) if all(basename_dict[k] in obj_list_dict[k] for k in basename_dict.keys()): for key in train_train_obj_list_dict.keys(): temp_train_train_obj_list_dict[key].append( basename_dict[key]) for key in train_train_obj_list_dict.keys(): train_train_obj_list_dict[key] = np.array( temp_train_train_obj_list_dict[key]) filename = os.path.join(cv_folder, 'train_' + key + '_list.csv') if not os.path.isfile(filename): row_list = [[item] for item in train_train_obj_list_dict[key]] KScsv.write_csv(row_list, filename) ################################################################# # validation dict_path = flags['dict_path'] dict_ext = flags['dict_ext'] obj_list_dict = dict() for key in dict_path.keys(): obj_list_dict[key] = glob.glob( os.path.join(dict_path[key], '*' + dict_ext[key])) temp_train_val_obj_list_dict = collections.defaultdict(list) for name in train_val_obj_list_dict['image']: basename = os.path.basename(name) basename = os.path.splitext(basename)[0] matching = sorted( [s for s in obj_list_dict['image'] if basename in s]) for m in matching: basename = os.path.basename(m) basename = os.path.splitext(basename)[0] basename_dict = dict() for key in train_val_obj_list_dict.keys(): basename_dict[key] = os.path.join( dict_path[key], basename + dict_ext[key]) if all(basename_dict[k] in obj_list_dict[k] for k in basename_dict.keys()): for key in train_val_obj_list_dict.keys(): temp_train_val_obj_list_dict[key].append( basename_dict[key]) for key in train_val_obj_list_dict.keys(): train_val_obj_list_dict[key] = np.array( temp_train_val_obj_list_dict[key]) filename = os.path.join(cv_folder, 'val_' + key + '_list.csv') if not os.path.isfile(filename): row_list = [[item] for item in train_val_obj_list_dict[key]] KScsv.write_csv(row_list, filename) else: for i_num in range(num): cv_folder = os.path.join(flags['experiment_folder'], 'perm' + str(i_num + 1)) create_dir(cv_folder) train_obj_list_dict = dict() for key in obj_list.keys(): train_obj_list_dict[key] = obj_list[key] train_groups_label = groups_label sss = StratifiedShuffleSplit(n_splits=1, test_size=val_percentage / 100.0) for train_train_index, train_val_index in sss.split( train_obj_list_dict['image'], train_groups_label): train_train_obj_list_dict = dict() train_val_obj_list_dict = dict() for key in train_obj_list_dict.keys(): train_train_obj_list_dict[key] = train_obj_list_dict[key][ train_train_index] train_val_obj_list_dict[key] = train_obj_list_dict[key][ train_val_index] ################################################################# # train dict_path = flags['dict_path'] dict_ext = flags['dict_ext'] obj_list_dict = dict() for key in dict_path.keys(): obj_list_dict[key] = glob.glob( os.path.join(dict_path[key], '*' + dict_ext[key])) temp_train_train_obj_list_dict = collections.defaultdict(list) for name in train_train_obj_list_dict['image']: basename = os.path.basename(name) basename = os.path.splitext(basename)[0] matching = sorted( [s for s in obj_list_dict['image'] if basename in s]) for m in matching: basename = os.path.basename(m) basename = os.path.splitext(basename)[0] basename_dict = dict() for key in train_train_obj_list_dict.keys(): basename_dict[key] = os.path.join( dict_path[key], basename + dict_ext[key]) if all(basename_dict[k] in obj_list_dict[k] for k in basename_dict.keys()): for key in train_train_obj_list_dict.keys(): temp_train_train_obj_list_dict[key].append( basename_dict[key]) for key in train_train_obj_list_dict.keys(): train_train_obj_list_dict[key] = np.array( temp_train_train_obj_list_dict[key]) filename = os.path.join(cv_folder, 'train_' + key + '_list.csv') if not os.path.isfile(filename): row_list = [[item] for item in train_train_obj_list_dict[key]] KScsv.write_csv(row_list, filename) ################################################################# # validation dict_path = flags['dict_path'] dict_ext = flags['dict_ext'] obj_list_dict = dict() for key in dict_path.keys(): obj_list_dict[key] = glob.glob( os.path.join(dict_path[key], '*' + dict_ext[key])) temp_train_val_obj_list_dict = collections.defaultdict(list) for name in train_val_obj_list_dict['image']: basename = os.path.basename(name) basename = os.path.splitext(basename)[0] matching = sorted( [s for s in obj_list_dict['image'] if basename in s]) for m in matching: basename = os.path.basename(m) basename = os.path.splitext(basename)[0] basename_dict = dict() for key in train_val_obj_list_dict.keys(): basename_dict[key] = os.path.join( dict_path[key], basename + dict_ext[key]) if all(basename_dict[k] in obj_list_dict[k] for k in basename_dict.keys()): for key in train_val_obj_list_dict.keys(): temp_train_val_obj_list_dict[key].append( basename_dict[key]) for key in train_val_obj_list_dict.keys(): train_val_obj_list_dict[key] = np.array( temp_train_val_obj_list_dict[key]) filename = os.path.join(cv_folder, 'val_' + key + '_list.csv') if not os.path.isfile(filename): row_list = [[item] for item in train_val_obj_list_dict[key]] KScsv.write_csv(row_list, filename)
def split_cv(obj_list, flags): """ split_cv splits data into train, validation, and test stratified by the group label param: images_list param: labels_list param: groups_list param: num param: val_percentage return: void """ num = flags['num_split'] val_percentage = flags['val_percentage'] groups_label = list() for file in obj_list['group']: row = KScsv.read_csv(file) groups_label.append(row[0][0]) groups_label = np.array(groups_label) for key in obj_list.keys(): obj_list[key] = np.array(obj_list[key]) skf = StratifiedKFold(n_splits=num) for i_num, (train_idx, test_idx) in enumerate( skf.split(obj_list['image'], groups_label)): cv_folder = os.path.join(flags['experiment_folder'], 'cv' + str(i_num + 1)) create_dir(cv_folder) test_obj_list_dict = dict() train_obj_list_dict = dict() for key in obj_list.keys(): test_obj_list_dict[key] = obj_list[key][test_idx] train_obj_list_dict[key] = obj_list[key][train_idx] train_groups_label = groups_label[train_idx] sss = StratifiedShuffleSplit(n_splits=1, test_size=val_percentage / 100.0) for train_train_index, train_val_index in sss.split( train_obj_list_dict['image'], train_groups_label): train_train_obj_list_dict = dict() train_val_obj_list_dict = dict() for key in train_obj_list_dict.keys(): train_train_obj_list_dict[key] = train_obj_list_dict[key][ train_train_index] train_val_obj_list_dict[key] = train_obj_list_dict[key][ train_val_index] ################################################################# # test for key in test_obj_list_dict.keys(): filename = os.path.join(cv_folder, 'test_' + key + '_list.csv') if not os.path.isfile(filename): row_list = [[item] for item in test_obj_list_dict[key]] KScsv.write_csv(row_list, filename) ################################################################# # train for key in train_train_obj_list_dict.keys(): filename = os.path.join(cv_folder, 'train_' + key + '_list.csv') if not os.path.isfile(filename): row_list = [[item] for item in train_train_obj_list_dict[key]] KScsv.write_csv(row_list, filename) ################################################################# # validation for key in train_val_obj_list_dict.keys(): filename = os.path.join(cv_folder, 'val_' + key + '_list.csv') if not os.path.isfile(filename): row_list = [[item] for item in train_val_obj_list_dict[key]] KScsv.write_csv(row_list, filename)
def inputs(object_folder, mode, flags, mat_contents): #keys = ['HE', 'DAPI', 'label'] key_list = list(flags['dict_path'].keys()) key_list.remove('group') if mode == 'train': log_file_path = os.path.join(object_folder, 'train', 'train_log.csv') else: log_file_path = os.path.join(object_folder, 'val', 'val_log.csv') log_list = KScsv.read_csv(log_file_path) #image_dict = collections.defaultdict(list) #label_dict = collections.defaultdict(list) #weight_dict = collections.defaultdict(list) all_img_dict = {} slice_input_list = [] for ind, key in enumerate(key_list): key_img_list = [] #deal with mean, var img mean_img = np.float32(mat_contents[key + '_mean']) var_img = np.float32(mat_contents[key + '_var']) if mean_img.ndim == 2: mean_img = np.expand_dims(mean_img, axis=2) if var_img.ndim == 2: var_img = np.expand_dims(var_img, axis=2) #mean_image = tf.constant(mean_img, name='mean_image') #var_image = tf.constant(var_img, name='var_image') for row in log_list: key_img_list.append(row[ind]) slice_input_list.append(key_img_list) min_queue_examples = int( len(slice_input_list[0]) * flags['min_fraction_of_examples_in_queue']) print('Filling queue with %d images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Create a queue that produces the filenames to read. combine_image_dict = list() #fliename_queue- list of file names filename_queue = tf.train.slice_input_producer(slice_input_list, shuffle=True) queue_dict, label_dict = read_data(filename_queue, flags) processed_dict = process_image_and_label(queue_dict, mat_contents, flags) # Generate a batch of images and labels by building up a queue of examples. batch_list, label = generate_batch(processed_dict, label_dict, min_queue_examples, int(flags['batch_size']), shuffle=False, flags=flags) #create final combined dict #combine_image_dict = collections.defaultdict(list) #combine_label_dict = collections.defaultdict(list) combine_image_dict = {} combine_label_dict = {} for ind, key in enumerate(key_list): #combine_image_dict[key].append(batch_list[ind]) combine_image_dict[key] = batch_list[ind] #combine_image_dict[key] = tf.concat(combine_image_dict[key],0) combine_label_dict[key] = label return combine_image_dict, combine_label_dict
def inputs(mean_image, variance_image, object_folder, mode, flags): """ inputs takes in either training or validation inputs, then performs aggressive data augmentation and normalization using process_image_and_label and places them into a mini_batch to be passed through the network param: mean_image param: variance_image param: object_folder param: mode param: min_fraction_of_examples_in_queue param: batch_size return: image dict, label dict, weight dict """ if mode == 'train': log_file_path = os.path.join(object_folder, 'train', 'train_log.csv') else: log_file_path = os.path.join(object_folder, 'val', 'val_log.csv') log_list = KScsv.read_csv(log_file_path) image_dict = collections.defaultdict(list) label_dict = collections.defaultdict(list) weight_dict = collections.defaultdict(list) for row in log_list: image_dict['image'].append(row[0]) label_dict['label'].append(row[1]) weight_dict['weight'].append(row[2]) min_queue_examples = int( len(image_dict['image']) * flags['min_fraction_of_examples_in_queue']) print('Filling queue with %d images before starting to train. ' 'This will take a few minutes.' % min_queue_examples) # Create a queue that produces the filenames to read. combine_image_dict = list() combine_label_dict = list() combine_weight_dict = list() filename_queue = tf.train.slice_input_producer( [image_dict['image'], label_dict['label'], weight_dict['weight']], shuffle=True) image, label, weight = read_data(filename_queue, flags) image = tf.cast(image, tf.float32) label = tf.cast(label, tf.float32) weight = tf.cast(weight, tf.float32) image, label, weight = process_image_and_label(image, label, weight, mean_image, variance_image, flags) # Generate a batch of images and labels by building up a queue of examples. image, label, weight = generate_batch(image, label, weight, min_queue_examples, int(flags['batch_size']), shuffle=False, flags=flags) combine_image_dict.append(image) combine_label_dict.append(label) combine_weight_dict.append(weight) out_image = tf.concat(0, combine_image_dict) out_label = tf.concat(0, combine_label_dict) out_weight = tf.concat(0, combine_weight_dict) return {'images': out_image, 'labels': out_label, 'weights': out_weight}