def __init__(self, outdir, pneumonia_path, test_persons=None):
        self.outdir = outdir
        self.pneumonia_path = pneumonia_path
        self.resize = int(args.resize_img)

        self.test_dict_persons = {
            'pneumonia': ['8', '31'],
            'COVID-19': ['19', '20', '36', '42', '86'],
            'normal': []
        }
        if test_persons is not None:
            self.test_dict_persons.update(test_persons)

        utils.mdir(os.path.join(self.outdir, 'train_data'))

        self.train_count = {'normal': 0, 'pneumonia': 0, 'COVID-19': 0}
        self.test_count = {'normal': 0, 'pneumonia': 0, 'COVID-19': 0}

        self.data_counts = {'normal': 0, 'pneumonia': 0, 'COVID-19': 0}
        self.mapping = {
            'COVID-19': 'COVID-19',
            'SARS': 'pneumonia',
            'MERS': 'pneumonia',
            'Streptococcus': 'pneumonia',
            'Normal': 'normal',
            'Lung Opacity': 'pneumonia',
            '1': 'pneumonia'
        }

        self.pneumonias = ["COVID-19", "SARS", "MERS", "ARDS", "Streptococcus"]
        self.pathologies = [
            "Pneumonia", "Viral Pneumonia", "Bacterial Pneumonia", "No Finding"
        ] + self.pneumonias
        print('pathologies:{}'.format(sorted(self.pathologies)))
def transform_to_tfrecords(data_path, name: str, channels=None, num_records=10):
    """
    This function is to iterate over all compressed files that we have and transform them in tf record data
    :param data_path: preprocessing folder
    :param name: Only train or test
    :param channels: 1
    :param num_records: number of files
    :return:
    """
    if str(data_path).endswith('/'):
        data_path = data_path[:-1]
        #
    if channels is None:
        channels = 1
    filenames = glob.glob('{}/{}_*.pz'.format(data_path, name))
    facialexpr_data = []
    for fn in tqdm(filenames):
        data = utils.load(fn)
        facialexpr_data.extend(data)
    #
    print('number of observations for {}:{}'.format(name, len(facialexpr_data)))
    samples_per_tf_record = len(facialexpr_data) // num_records
    tf_parts = [(k * samples_per_tf_record) for k in range(len(facialexpr_data)) if
                (k * samples_per_tf_record) < len(facialexpr_data)]
    utils.mdir(os.path.join(data_path, 'training'))
    for i, j in enumerate(tf_parts):
        out_fn = os.path.join(data_path, 'training', '{}_{:03d}-{:03d}.tfrecord'.format(name, i+1, num_records))
        _process_examples(facialexpr_data[j:(j + samples_per_tf_record)], out_fn, channels=channels)
    return None
def generate_example_sets(data_path, name, chunk_lenght=1000):
    """
    it saves all data in a selected folder with an specific name for train, test, eval
    :param data_path:
    :param name:
    :param chunk_lenght:
    :return:
    """
    train, test, evaluation = preprocess_fer_dataset(data_path)
    out_dir = os.path.join(data_path, name)
    utils.mdir(out_dir)
    generate_chunks(train, name='train', out_dir=out_dir, chunk_lenght=chunk_lenght)
    generate_chunks(test, name='test', out_dir=out_dir, chunk_lenght=chunk_lenght)
    generate_chunks(evaluation, name='eval', out_dir=out_dir, chunk_lenght=chunk_lenght)
    return None
def generate_relabel_sets(data_path, name, chunk_lenght=1000):
    train, test, evaluation = preprocess_relabeled_data(data_path)
    out_dir = os.path.join(data_path, name)
    utils.mdir(out_dir)
    generate_chunks(train,
                    name='train',
                    out_dir=out_dir,
                    chunk_lenght=chunk_lenght)
    generate_chunks(test,
                    name='test',
                    out_dir=out_dir,
                    chunk_lenght=chunk_lenght)
    generate_chunks(evaluation,
                    name='eval',
                    out_dir=out_dir,
                    chunk_lenght=chunk_lenght)
Пример #5
0
    def __init__(self,
                 model_name,
                 data_path,
                 explain=False,
                 save_predictions=False,
                 **kwargs):
        utils.setup_gpus()

        self.model_path = os.path.join('./trained_models', model_name,
                                       'frozen')

        print('Loading model from: {}'.format(self.model_path))

        self.model_name = model_name
        self.data = DataLoader(data_path, training=False).test_dataset()
        self.model = tf.keras.models.load_model(self.model_path)
        self.explain = explain
        self.outdir = os.path.join('./trained_models', model_name, 'results')
        self.class_names = ['normal', 'pneumonia', 'COVID-19']
        self.save_predictions = save_predictions
        utils.mdir(self.outdir)
def generate_chunks(data, name, out_dir, chunk_lenght=1000):
    """
    This function is made with the purpose of transforming list of dictionaries into chunks of data for movility purposes
    in big scale training strategies, you train with several files as you can not load everything in memory at once
    (ex. keras, pandas, etc)
    :param data: list of dictionaries
    :param name: name for the data folder
    :param out_dir: output directory
    :param chunk_lenght: lenght of the zip files
    :return:None (just to keep syntax)
    """
    parts = [(k * chunk_lenght) for k in range(len(data)) if (k * chunk_lenght) < len(data)]
    print('files generated for {} set, with {} chunks'.format(name, len(parts)))
    if not os.path.exists(out_dir):
        utils.mdir(out_dir)
    for i, j in enumerate(parts):
        new_data = data[j:(j + chunk_lenght)]
        random.seed(1)
        new_data = random.sample(new_data, len(new_data))
        fn = '{}_{}-{}.data'.format(name, format(i + 1, '03d'), len(parts))
        utils.save(os.path.join(out_dir, fn), new_data)
    print('training/evaluation batches saved in: {}'.format(out_dir))
    return None
 def create_dirs(self):
     log_dir = os.path.join(self.model_path, 'logs')
     ckpt_dir = os.path.join(self.model_path, 'checkpoints')
     train_writer = tf.summary.create_file_writer(
         os.path.join(log_dir, 'opt/train'))
     test_writer = tf.summary.create_file_writer(
         os.path.join(log_dir, 'opt/test'))
     utils.mdir(log_dir)
     utils.mdir(ckpt_dir)
     utils.mdir(os.path.join(self.model_path, 'weights'))
     return log_dir, ckpt_dir, train_writer, test_writer
Пример #8
0
 def __init__(self, data_path):
     self.data_path = data_path
     utils.mdir(os.path.join(data_path, 'preprocessing'))