Exemplo n.º 1
0
def write_shard_records(filename, excel_path, dicom_dir, split_name):
    ds_pytorch = ecg_to_echo_dataset.ECGToEchoDataset(excel_path, dicom_dir,
                                                      split_name, None)
    dicom_files = ds_pytorch.annotations_df['file name']
    labels = ds_pytorch.annotations_df['label']
    index = 0
    n_ecg_shard = 300
    n_shards = len(dicom_files) // 300

    # tqdm is an amazing package that if you don't know yet you must check it
    for shard in tqdm.tqdm(range(n_shards)):
        # The original tfrecords_path is "{}_{}_{}.records" so the first parameter is the name of the dataset,
        # the second is "train" or "val" or "test" and the last one the pattern.
        tfrecords_shard_path = "{}_{}_{}.record".format(
            filename, "test", '%.5d-of-%.5d' % (shard, n_shards - 1))
        end = index + n_ecg_shard if len(dicom_files) > (index +
                                                         n_ecg_shard) else -1
        ecg_shard_list = dicom_files[index:end]
        labeld_shard_list = labels[index:end]
        print(index, end)
        with tf.io.TFRecordWriter(os.path.join(
                'data', tfrecords_shard_path)) as writer:
            for dicom_file, label in zip(ecg_shard_list, labeld_shard_list):
                print(dicom_file, label)
                dicom_path = os.path.join(dicom_dir, f"{dicom_file}")
                patient_obj = patient.Patient(patient_dicom_path=dicom_path)
                example = serialize_example(patient_obj.filtered_signals,
                                            label)
                writer.write(example)
        index = end
Exemplo n.º 2
0
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        dicom_path = os.path.join(self.dicom_dir, f"{self.annotations_df['file name'].iloc[idx]}")
        patient_obj = patient.Patient(patient_dicom_path=dicom_path)
        if not self.threshold_35:
            echo_result = self.annotations_df['label'].iloc[idx]
        else:
            label_str = self.annotations_df['cognos_reserved_text13'].iloc[idx]
            if label_str in metadata.HEALTHY_35:
                echo_result = 1
            else:
                if label_str not in metadata.SICK_35:
                    raise AssertionError(f"{label_str}")
                echo_result = 0
        sample = {# 'ecg_signal_unfiltered': patient_obj.unfiltered_signals,
                  # 'ecg_signal_filtered': patient_obj.filtered_signals,
                  'ecg_signal_filtered': patient_obj.sub_sampled_ecg,
                  'echo': echo_result,
                  'dicom_file': self.annotations_df['file name'].iloc[idx]}
        # print(sample['ecg_signal_filtered'].shape)
        if self.transform:
            sample = self.transform(sample)

        return sample
def export_data_to_csv(dicom_dir):
    print("Starting job..")
    dict_info = defaultdict(list)
    for f_name in os.listdir(dicom_dir):
        # print(f_name)
        if f_name.endswith('.dcm'):
            file_path = os.path.join(dicom_dir, f_name)
            print("Processing file {}...".format(f_name))
            p = patient.Patient(file_path)
            file_name = f_name
            first_name = p.first_name
            last_name = p.last_name
            patient_id = p.id
            date = " ".join(p.date.split()[:-1])
            time = p.date.split()[-1]
            print(
                "file name: {}\nfirst name: {}\nlast name: {}\npatient id: {}\ndate: {}\ntime: {}\n"
                .format(file_name, first_name, last_name, patient_id, date,
                        time))
            dict_info['file name'].append(file_name)
            dict_info['first name'].append(first_name)
            dict_info['last name'].append(last_name)
            dict_info['patient id'].append(patient_id)
            dict_info['date'].append(date)
            dict_info['time'].append(time)

    df = pd.DataFrame(dict_info,
                      columns=[
                          'file name', 'first name', 'last name', 'patient id',
                          'date', 'time'
                      ])
    df.to_csv('data_info.csv', index=False)
    print("Export completed...")
Exemplo n.º 4
0
def export_excel_with_new_dicoms(minimum_date, dicom_dir='.'):
    """Export an excel with lists of all dicom files.

    The excel holds names of dicom files with thier ID numbers and date.

    :param minimum_date:
    :param dicom_dir:
    :return:
    """

    dcm_files = [f for f in os.listdir(dicom_dir) if f.endswith('.dcm')]
    logging.info("Found %d dcm files...", len(dcm_files))
    dicom_paths = []
    ecg_dates = []
    patient_ids = []
    for f in dcm_files:
        dicom_path = os.path.join(dicom_dir, f)
        patient_obj = patient.Patient(patient_dicom_path=dicom_path)
        ecg_date = patient_obj.date
        date_time_obj = datetime.datetime.strptime(ecg_date, '%d %b %Y %H:%M')
        if date_time_obj > minimum_date:
            logging.info("Found new ECG. DICOM: %s. ID: %s. Date: %s", f,
                         patient_obj.id, date_time_obj)
            dicom_paths.append(dicom_path)
            ecg_dates.append(date_time_obj)
            patient_ids.append(patient_obj.id)

    df = pd.DataFrame(list(zip(dicom_paths, ecg_dates, patient_ids)),
                      columns=['DICOM_PATH', 'ECG_DATE', 'PATIENT_ID'])
    df.to_csv("new_ecgs.csv")
Exemplo n.º 5
0
def create_tf_records(filename, excel_path, dicom_dir, split_name):
    with tf.io.TFRecordWriter(filename) as writer:
        ds_pytorch = ecg_to_echo_dataset.ECGToEchoDataset(
            excel_path, dicom_dir, split_name, None)
        dicom_files = ds_pytorch.annotations_df['file name']
        labels = ds_pytorch.annotations_df['label']

        for dicom_file, label in zip(dicom_files, labels):
            print(dicom_file, label)
            dicom_path = os.path.join(dicom_dir, f"{dicom_file}")
            patient_obj = patient.Patient(patient_dicom_path=dicom_path)
            example = serialize_example(patient_obj.filtered_signals, label)
            writer.write(example)
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        dicom_path = os.path.join(
            self.root_dir, f"{self.ecg_echo_df['file name'].iloc[idx]}")
        patient_obj = patient.Patient(patient_dicom_path=dicom_path)
        echo_result = self.ecg_echo_df['label'].iloc[idx]
        sample = {
            'ecg_signal': patient_obj.unfiltered_signals,
            'echo': echo_result
        }

        if self.transform:
            sample = self.transform(sample)
        return sample
def create_tf_dataset(excel_path, dicom_dir, split_name):
    def _parse_dicom(dicom_file, label):
        filtered_signals = tf.numpy_function(parse_dicom, inp=[dicom_file, dicom_dir], Tout=tf.float32)
        # dicom_path = os.path.join(dicom_dir, f"{dicom_file}")
        # patient_obj = patient.Patient(patient_dicom_path=dicom_path)
        return filtered_signals, label

    ds_pytorch = ecg_to_echo_dataset.ECGToEchoDataset(excel_path, dicom_dir, split_name, None)
    dicom_files = ds_pytorch.annotations_df['file name']
    labels = ds_pytorch.annotations_df['label']
    signals = []
    for dicom_file in dicom_files:
        dicom_path = os.path.join(dicom_dir, f"{dicom_file}")
        patient_obj = patient.Patient(patient_dicom_path=dicom_path)
        signals.append(patient_obj.filtered_signals)
    tf_ds = tf.data.Dataset.from_tensor_slices((signals, labels))
    return tf_ds
Exemplo n.º 8
0
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        dicom_path = os.path.join(
            metadata.DICOM_DIR, f"{self.test_set_df['file name'].iloc[idx]}")
        patient_obj = patient.Patient(patient_dicom_path=dicom_path)
        ecg_number = self.test_set_df['ecg_number'].iloc[idx]
        age = patient_obj.age
        number_of_samples = patient_obj.number_of_samples
        sampling_rate = patient_obj.sampling_frequency
        gender = patient_obj.gender
        duration = patient_obj.decoded_dcom_obj.duration
        if not self.threshold_35:
            echo_result = self.test_set_df['label'].iloc[idx]
        else:
            label_str = self.test_set_df['cognos_reserved_text13'].iloc[idx]
            if label_str in metadata.HEALTHY_35:
                echo_result = 1
            else:
                if label_str not in metadata.SICK_35:
                    raise AssertionError(f"{label_str}")
                echo_result = 0
        sample = {  # 'ecg_signal_unfiltered': patient_obj.unfiltered_signals,
            # 'ecg_signal_filtered': patient_obj.filtered_signals,
            'ecg_signal_filtered': patient_obj.sub_sampled_ecg,
            'echo': echo_result,
            'ecg_number': ecg_number,
            'dicom_file': self.test_set_df['file name'].iloc[idx],
            'age': age,
            'number_of_samples': number_of_samples,
            'sampling_rate': sampling_rate,
            'gender': gender,
            'duration': duration
        }
        if self.transform:
            sample = self.transform(sample)
        return sample
def parse_dicom(dicom_file, dicom_dir):
    dicom_path = os.path.join(dicom_dir, f"{dicom_file}")
    patient_obj = patient.Patient(patient_dicom_path=dicom_path)
    return patient_obj.filtered_signals
Exemplo n.º 10
0
#
# print(e.channels_no)
# print(e.sampling_frequency)
# print(e.samples)
# print(e.duration)
#
# interp = e.interpretation()
# print(interp)
# inf = e.print_info(interp)
# print(inf)

# decoded_dcom = decode_dcom_files.DecodeDCOM(dicom_file_path)
# unfiltered_signals = decoded_dcom.unfiltered_signlas
# filtererd_signals = decoded_dcom.filtered_signals
# plt.figure()
# plt.plot(unfiltered_signals[0][500:6000])
# plt.title("Unfiltered signals")
# plt.figure()
# plt.plot(filtererd_signals[0][500:6000])
# plt.title("Filtered signals")
# plt.show()
# print(decoded_dcom.channel_definitions)

p = patient.Patient(dicom_file_path)
p.print_info()
sigs = p.get_signals()

print("Signals shape: ", sigs.shape)
plt.plot(sigs[0][100:800])
plt.show()
Exemplo n.º 11
0
def parse_sampling_rate(row, dicom_dir):
    dicom_path = os.path.join(dicom_dir, f"{row['file name']}")
    patient_obj = patient.Patient(patient_dicom_path=dicom_path)
    return patient_obj.sampling_frequency
Exemplo n.º 12
0
def parse_num_samples(row, dicom_dir):
    dicom_path = os.path.join(dicom_dir, f"{row['file name']}")
    patient_obj = patient.Patient(patient_dicom_path=dicom_path)
    return patient_obj.filtered_signals.shape[1]