def __init__(self, outdir, pneumonia_path, test_persons=None): self.outdir = outdir self.pneumonia_path = pneumonia_path self.resize = int(args.resize_img) self.test_dict_persons = { 'pneumonia': ['8', '31'], 'COVID-19': ['19', '20', '36', '42', '86'], 'normal': [] } if test_persons is not None: self.test_dict_persons.update(test_persons) utils.mdir(os.path.join(self.outdir, 'train_data')) self.train_count = {'normal': 0, 'pneumonia': 0, 'COVID-19': 0} self.test_count = {'normal': 0, 'pneumonia': 0, 'COVID-19': 0} self.data_counts = {'normal': 0, 'pneumonia': 0, 'COVID-19': 0} self.mapping = { 'COVID-19': 'COVID-19', 'SARS': 'pneumonia', 'MERS': 'pneumonia', 'Streptococcus': 'pneumonia', 'Normal': 'normal', 'Lung Opacity': 'pneumonia', '1': 'pneumonia' } self.pneumonias = ["COVID-19", "SARS", "MERS", "ARDS", "Streptococcus"] self.pathologies = [ "Pneumonia", "Viral Pneumonia", "Bacterial Pneumonia", "No Finding" ] + self.pneumonias print('pathologies:{}'.format(sorted(self.pathologies)))
def transform_to_tfrecords(data_path, name: str, channels=None, num_records=10): """ This function is to iterate over all compressed files that we have and transform them in tf record data :param data_path: preprocessing folder :param name: Only train or test :param channels: 1 :param num_records: number of files :return: """ if str(data_path).endswith('/'): data_path = data_path[:-1] # if channels is None: channels = 1 filenames = glob.glob('{}/{}_*.pz'.format(data_path, name)) facialexpr_data = [] for fn in tqdm(filenames): data = utils.load(fn) facialexpr_data.extend(data) # print('number of observations for {}:{}'.format(name, len(facialexpr_data))) samples_per_tf_record = len(facialexpr_data) // num_records tf_parts = [(k * samples_per_tf_record) for k in range(len(facialexpr_data)) if (k * samples_per_tf_record) < len(facialexpr_data)] utils.mdir(os.path.join(data_path, 'training')) for i, j in enumerate(tf_parts): out_fn = os.path.join(data_path, 'training', '{}_{:03d}-{:03d}.tfrecord'.format(name, i+1, num_records)) _process_examples(facialexpr_data[j:(j + samples_per_tf_record)], out_fn, channels=channels) return None
def generate_example_sets(data_path, name, chunk_lenght=1000): """ it saves all data in a selected folder with an specific name for train, test, eval :param data_path: :param name: :param chunk_lenght: :return: """ train, test, evaluation = preprocess_fer_dataset(data_path) out_dir = os.path.join(data_path, name) utils.mdir(out_dir) generate_chunks(train, name='train', out_dir=out_dir, chunk_lenght=chunk_lenght) generate_chunks(test, name='test', out_dir=out_dir, chunk_lenght=chunk_lenght) generate_chunks(evaluation, name='eval', out_dir=out_dir, chunk_lenght=chunk_lenght) return None
def generate_relabel_sets(data_path, name, chunk_lenght=1000): train, test, evaluation = preprocess_relabeled_data(data_path) out_dir = os.path.join(data_path, name) utils.mdir(out_dir) generate_chunks(train, name='train', out_dir=out_dir, chunk_lenght=chunk_lenght) generate_chunks(test, name='test', out_dir=out_dir, chunk_lenght=chunk_lenght) generate_chunks(evaluation, name='eval', out_dir=out_dir, chunk_lenght=chunk_lenght)
def __init__(self, model_name, data_path, explain=False, save_predictions=False, **kwargs): utils.setup_gpus() self.model_path = os.path.join('./trained_models', model_name, 'frozen') print('Loading model from: {}'.format(self.model_path)) self.model_name = model_name self.data = DataLoader(data_path, training=False).test_dataset() self.model = tf.keras.models.load_model(self.model_path) self.explain = explain self.outdir = os.path.join('./trained_models', model_name, 'results') self.class_names = ['normal', 'pneumonia', 'COVID-19'] self.save_predictions = save_predictions utils.mdir(self.outdir)
def generate_chunks(data, name, out_dir, chunk_lenght=1000): """ This function is made with the purpose of transforming list of dictionaries into chunks of data for movility purposes in big scale training strategies, you train with several files as you can not load everything in memory at once (ex. keras, pandas, etc) :param data: list of dictionaries :param name: name for the data folder :param out_dir: output directory :param chunk_lenght: lenght of the zip files :return:None (just to keep syntax) """ parts = [(k * chunk_lenght) for k in range(len(data)) if (k * chunk_lenght) < len(data)] print('files generated for {} set, with {} chunks'.format(name, len(parts))) if not os.path.exists(out_dir): utils.mdir(out_dir) for i, j in enumerate(parts): new_data = data[j:(j + chunk_lenght)] random.seed(1) new_data = random.sample(new_data, len(new_data)) fn = '{}_{}-{}.data'.format(name, format(i + 1, '03d'), len(parts)) utils.save(os.path.join(out_dir, fn), new_data) print('training/evaluation batches saved in: {}'.format(out_dir)) return None
def create_dirs(self): log_dir = os.path.join(self.model_path, 'logs') ckpt_dir = os.path.join(self.model_path, 'checkpoints') train_writer = tf.summary.create_file_writer( os.path.join(log_dir, 'opt/train')) test_writer = tf.summary.create_file_writer( os.path.join(log_dir, 'opt/test')) utils.mdir(log_dir) utils.mdir(ckpt_dir) utils.mdir(os.path.join(self.model_path, 'weights')) return log_dir, ckpt_dir, train_writer, test_writer
def __init__(self, data_path): self.data_path = data_path utils.mdir(os.path.join(data_path, 'preprocessing'))