示例#1
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer)
                (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

                for i in range(0, current_size, B):
                    X = nn_utils.pad_zeros(Xs[i:i + B])
                    y = np.array(ys[i:i + B])
                    batch_names = names[i:i+B]
                    batch_ts = ts[i:i+B]
                    batch_data = (X, y)
                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {"data": batch_data, "names": batch_names, "ts": batch_ts}
示例#2
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp_data = [[None] * N, [None] * N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][i] = self.data[0][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                X = self.data[0]
                y = self.data[1]
                (X, y, self.names, self.ts) = common_utils.sort_and_shuffle([X, y, self.names, self.ts], B)
                self.data = [X, y]

            self.data[1] = np.array(self.data[1])  # this is important for Keras
            for i in range(0, len(self.data[0]), B):
                x = self.data[0][i:i+B]
                y = self.data[1][i:i+B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                x = nn_utils.pad_zeros(x)
                y = np.array(y)  # (B, 25)

                if self.target_repl:
                    y_rep = np.expand_dims(y, axis=1).repeat(x.shape[1], axis=1)  # (B, T, 25)
                    batch_data = (x, [y, y_rep])
                else:
                    batch_data = (x, y)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
示例#3
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp_data = [[[None]*N, [None]*N], [None]*N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][0][i] = self.data[0][0][order[i]]
                    tmp_data[0][1][i] = self.data[0][1][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle([Xs, masks, ys,
                                                                                      self.names, self.ts], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)  # (B, T, 1)
                batch_data = ([X, mask], y)
                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
示例#4
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kvpairs = self.data.items()
            mas = [kv[1] for kv in kvpairs]
            mas = common_utils.sort_and_shuffle(mas, B)
            for i in range(len(kvpairs)):
                self.data[kvpairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = X.shape[1]

                ## ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                ## decomp
                decomp_M = self.data['decomp_M'][i:i + B]
                decomp_M = nn_utils.pad_zeros(decomp_M,
                                              min_length=self.ihm_pos + 1)
                decomp_y = self.data['decomp_y'][i:i + B]
                decomp_y = nn_utils.pad_zeros(decomp_y,
                                              min_length=self.ihm_pos + 1)
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, T, 1)
                outputs.append(decomp_y)

                ## los
                los_M = self.data['los_M'][i:i + B]
                los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1)
                los_y = self.data['los_y'][i:i + B]
                los_y_true = nn_utils.pad_zeros(los_y,
                                                min_length=self.ihm_pos + 1)

                if self.partition == 'log':
                    los_y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in los_y
                    ]
                if self.partition == 'custom':
                    los_y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in los_y
                    ]
                los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                ## pheno
                pheno_y = np.array(self.data['pheno_y'][i:i + B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(
                        T, axis=1)  # (B, T, 25)
                    outputs.append(pheno_seq)

                inputs = [X, ihm_M, decomp_M, los_M]

                if self.return_y_true:
                    yield (inputs, outputs, los_y_true)
                else:
                    yield (inputs, outputs)
示例#5
0
    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = list(range(N))
                random.shuffle(order)
                tmp_data = [[[None] * N, [None] * N], [None] * N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][0][i] = self.data[0][0][order[i]]
                    tmp_data[0][1][i] = self.data[0][1][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks, ys, self.names,
                 self.ts) = common_utils.sort_and_shuffle(
                     [Xs, masks, ys, self.names, self.ts], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]
                names = self.names[i:i + B]
                ts = self.ts[i:i + B]

                y_true = [np.array(x) for x in y]
                y_true = common_utils.pad_zeros(y_true)
                y_true = np.expand_dims(y_true, axis=-1)

                if self.partition == 'log':
                    y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in y
                    ]
                if self.partition == 'custom':
                    y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in y
                    ]

                X = common_utils.pad_zeros(X)  # (B, T, D)
                mask = common_utils.pad_zeros(mask)  # (B, T)
                y = common_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)

                if self.return_y_true:
                    batch_data = ([X, mask], y, y_true)
                else:
                    batch_data = ([X, mask], y)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}
示例#6
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kv_pairs = self.data.items()
            mas = [kv[1] for kv in kv_pairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = range(N)
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i+B]
                X = nn_utils.pad_zeros(X, min_length=self.ihm_pos+1)
                T = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i+B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i+B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                # decomp
                decomp_M = self.data['decomp_M'][i:i+B]
                decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos+1)
                decomp_y = self.data['decomp_y'][i:i+B]
                decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos+1)
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, T, 1)
                outputs.append(decomp_y)

                # los
                los_M = self.data['los_M'][i:i+B]
                los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos+1)
                los_y = self.data['los_y'][i:i+B]
                los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1)

                if self.partition == 'log':
                    los_y = [np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y]
                if self.partition == 'custom':
                    los_y = [np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y]
                los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                # pheno
                pheno_y = np.array(self.data['pheno_y'][i:i+B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(T, axis=1)  # (B, T, 25)
                    outputs.append(pheno_seq)

                inputs = [X, ihm_M, decomp_M, los_M]

                if self.return_y_true:
                    batch_data = (inputs, outputs, los_y_true)
                else:
                    batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {'data': batch_data,
                           'names': self.data['names'][i:i+B],
                           'decomp_ts': self.data['decomp_ts'][i:i+B],
                           'los_ts': self.data['los_ts'][i:i+B],
                           'pheno_ts': self.data['pheno_ts'][i:i + B]}
示例#7
0
文件: utils.py 项目: sz891016/EHR
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kv_pairs = list(self.data.items())
            data_index = [pair[0] for pair in kv_pairs].index('X')
            if data_index > 0:
                kv_pairs[0], kv_pairs[data_index] = kv_pairs[
                    data_index], kv_pairs[0]
            mas = [kv[1] for kv in kv_pairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = list(range(N))
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                X = common_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                # los
                los_M = self.data['los_M'][i:i + B]
                los_M = common_utils.pad_zeros(los_M,
                                               min_length=self.ihm_pos + 1)
                los_y = self.data['los_y'][i:i + B]
                los_y_true = common_utils.pad_zeros(los_y,
                                                    min_length=self.ihm_pos +
                                                    1)

                if self.partition == 'log':
                    los_y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in los_y
                    ]
                if self.partition == 'custom':
                    los_y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in los_y
                    ]
                los_y = common_utils.pad_zeros(los_y,
                                               min_length=self.ihm_pos + 1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                inputs = [X, ihm_M, los_M]

                if self.return_y_true:
                    batch_data = (inputs, outputs, los_y_true)
                else:
                    batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {
                        'data': batch_data,
                        'names': self.data['names'][i:i + B],
                        'los_ts': self.data['los_ts'][i:i + B]
                    }
示例#8
0
    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kv_pairs = list(self.data.items())
            data_index = [pair[0] for pair in kv_pairs].index('X')
            if data_index > 0:
                kv_pairs[0], kv_pairs[data_index] = kv_pairs[
                    data_index], kv_pairs[0]
            mas = [kv[1] for kv in kv_pairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = list(range(N))
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                T = self.data['T'][i:i + B]
                X = common_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = common_utils.pad_zeros(T, min_length=self.ihm_pos + 1)
                t = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        t, axis=1)  # (B, t, 1)
                    outputs.append(ihm_seq)

                # decomp
                decomp_y = self.data['decomp_y'][i:i + B]
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, 1)
                outputs.append(decomp_y)

                # los
                los_y = self.data['los_y'][i:i + B]
                los_y = np.expand_dims(los_y, axis=-1)  # (B, 1)
                outputs.append(los_y)

                # pheno
                pheno_y = np.array(self.data['pheno_y'][i:i + B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(
                        t, axis=1)  # (B, t, 25)
                    outputs.append(pheno_seq)

                inputs = [X, T, ihm_M]

                batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {
                        'data': batch_data,
                        'names': self.data['names'][i:i + B],
                        'decomp_ts': self.data['decomp_ts'][i:i + B],
                        'los_ts': self.data['los_ts'][i:i + B],
                        'pheno_ts': self.data['pheno_ts'][i:i + B]
                    }