Python get_bin_custom示例，mimic3models.metrics.get_bin_custom Python示例

示例#1

0

显示文件

文件： utils.py 项目： huangmozhilv/mimic3-benchmarks

    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size
                (data, ts, labels,
                 header) = read_chunk(self.reader, current_size)
                data = preprocess_chunk(data, ts, self.discretizer,
                                        self.normalizer)
                data = (data, labels)
                data = common_utils.sort_and_shuffle(data, B)

                for i in range(0, current_size, B):
                    X = nn_utils.pad_zeros(data[0][i:i + B])
                    y = data[1][i:i + B]
                    y_true = np.array(y)

                    if self.partition == 'log':
                        y = [metrics.get_bin_log(x, 10) for x in y]
                    if self.partition == 'custom':
                        y = [metrics.get_bin_custom(x, 10) for x in y]

                    y = np.array(y)

                    if self.return_y_true:
                        yield (X, y, y_true)
                    else:
                        yield (X, y)

示例#2

0

显示文件

文件： utils.py 项目： danesherbs/SOM-VAE

    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = self.n_examples
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                Xs, Ts = preprocess_chunk_time(Xs,
                                               ts,
                                               self.discretizer,
                                               self.normalizer,
                                               max_seq_len=1200,
                                               mask_value=0.)
                (Xs, Ts, ys, ts, names) = common_utils.sort_and_shuffle(
                    [Xs, Ts, ys, ts, names], B)

                for i in range(0, current_size, B):
                    X = common_utils.pad_zeros(Xs[i:i + B])
                    T = common_utils.pad_zeros(Ts[i:i + B])
                    y = ys[i:i + B]
                    y_true = np.array(y)
                    batch_names = names[i:i + B]
                    batch_ts = ts[i:i + B]

                    if self.partition == 'log':
                        y = [metrics.get_bin_log(x, 10) for x in y]
                    if self.partition == 'custom':
                        y = [metrics.get_bin_custom(x, 10) for x in y]

                    y = np.array(y)

                    if self.use_time:
                        if self.return_y_true:
                            batch_data = ([X, T], y, y_true)
                        else:
                            batch_data = ([X, T], y)
                    else:
                        if self.return_y_true:
                            batch_data = (X, y, y_true)
                        else:
                            batch_data = (X, y)

                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {
                            "data": batch_data,
                            "names": batch_names,
                            "ts": batch_ts
                        }

示例#3

0

显示文件

    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = list(range(N))
                random.shuffle(order)
                tmp_data = [[[None]*N, [None]*N], [None]*N]
                tmp_names = [None] * N
                tmp_ts = [None] * N
                for i in range(N):
                    tmp_data[0][0][i] = self.data[0][0][order[i]]
                    tmp_data[0][1][i] = self.data[0][1][order[i]]
                    tmp_data[1][i] = self.data[1][order[i]]
                    tmp_names[i] = self.names[order[i]]
                    tmp_ts[i] = self.ts[order[i]]
                self.data = tmp_data
                self.names = tmp_names
                self.ts = tmp_ts
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle([Xs, masks, ys,
                                                                                      self.names, self.ts], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i+B]
                mask = self.data[0][1][i:i+B]
                y = self.data[1][i:i+B]
                names = self.names[i:i+B]
                ts = self.ts[i:i+B]

                y_true = [np.array(x) for x in y]
                y_true = common_utils.pad_zeros(y_true)
                y_true = np.expand_dims(y_true, axis=-1)

                if self.partition == 'log':
                    y = [np.array([metrics.get_bin_log(x, 10) for x in z]) for z in y]
                if self.partition == 'custom':
                    y = [np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in y]

                X = common_utils.pad_zeros(X)  # (B, T, D)
                mask = common_utils.pad_zeros(mask)  # (B, T)
                y = common_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)

                if self.return_y_true:
                    batch_data = ([X, mask], y, y_true)
                else:
                    batch_data = ([X, mask], y)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {"data": batch_data, "names": names, "ts": ts}

示例#4

0

显示文件

文件： utils copy.py 项目： alanflanders/mimic3-benchmarks

    def _generator(self):
        print(f"examples: {self.n_examples}  steps: {self.steps}")

        B = self.batch_size
        while True:
            if self.shuffle:
                self.reader.random_shuffle()
            remaining = int(self.n_examples * 1.15)
            while remaining > 0:
                current_size = min(self.chunk_size, remaining)
                remaining -= current_size
                print(f"Reading chunk size: {current_size} with {remaining} remaining")

                ret = common_utils.read_chunk(self.reader, current_size)
                Xs = ret["X"]
                ts = ret["t"]
                ys = ret["y"]
                names = ret["name"]

                print(f"len(Xs): {len(Xs)}")

                Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer)
                (Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

                for i in range(0, current_size, B):

                    X = common_utils.pad_zeros(Xs[i:i + B])
                    y = ys[i:i+B]
                    y_true = np.array(y)
                    batch_names = names[i:i+B]
                    batch_ts = ts[i:i+B]

                    if self.partition == 'log':
                        y = [metrics.get_bin_log(x, 10) for x in y]
                    if self.partition == 'custom':
                        y = [metrics.get_bin_custom(x, 10) for x in y]

                    y = np.array(y)

                    #aflanders: debug-Convert to tensors
                    # X = tf.convert_to_tensor(X)
                    # y = tf.convert_to_tensor(y)
                    # y_true = tf.convert_to_tensor(y_true)
                    #aflanders: debug-Convert to tensors

                    if self.return_y_true:
                        batch_data = (X, y, y_true)
                    else:
                        batch_data = (X, y)

                    if not self.return_names:
                        yield batch_data
                    else:
                        yield {"data": batch_data, "names": batch_names, "ts": batch_ts}

示例#5

0

显示文件

    def process_input(self, data_raw):
        Xs = nn_utils.pad_zeros(data_raw[0]).astype(np.float32)
        lens = map(len, data_raw[0])
        ys = np.array(data_raw[1]).astype(np.float32)

        bin_ids = [metrics.get_bin_custom(x, self.nbins) for x in ys]

        for x in bin_ids:
            assert x >= 0 and x < self.nbins

        return (Xs, lens, np.array(bin_ids, dtype=np.int32), ys)

示例#6

0

显示文件

    def _generator(self):
        B = self.batch_size
        while True:
            if self.shuffle:
                N = len(self.data[1])
                order = range(N)
                random.shuffle(order)
                tmp = [[[None] * N, [None] * N], [None] * N]
                for i in range(N):
                    tmp[0][0][i] = self.data[0][0][order[i]]
                    tmp[0][1][i] = self.data[0][1][order[i]]
                    tmp[1][i] = self.data[1][order[i]]
                self.data = tmp
            else:
                # sort entirely
                Xs = self.data[0][0]
                masks = self.data[0][1]
                ys = self.data[1]
                (Xs, masks,
                 ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B)
                self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]

                y_true = [np.array(x) for x in y]
                y_true = nn_utils.pad_zeros(y_true)
                y_true = np.expand_dims(y_true, axis=-1)

                if self.partition == 'log':
                    y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in y
                    ]
                if self.partition == 'custom':
                    y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in y
                    ]

                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)

                if self.return_y_true:
                    yield ([X, mask], y, y_true)
                else:
                    yield ([X, mask], y)

示例#7

0

显示文件

文件： main_cf.py 项目： zzzace2000/mimic-preprocess

def read_and_extract_features(reader, count):
    read_chunk_size = 1000
    #assert (count % read_chunk_size == 0)
    Xs = []
    ys = []
    for i in range(count // read_chunk_size):
        (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size)
        X = common_utils.extract_features_from_rawdata(chunk, header,
                                                       args.period,
                                                       args.features)
        Xs.append(X)
        ys += y
    Xs = np.concatenate(Xs, axis=0)
    bins = np.array([one_hot(metrics.get_bin_custom(x, nbins)) for x in ys])
    return (Xs, bins, ys)

示例#8

0

显示文件

def read_and_extract_features(reader, count, period, features):
    read_chunk_size = 1000
    Xs = []
    ys = []
    names = []
    ts = []
    for i in range(0, count, read_chunk_size):
        j = min(count, i + read_chunk_size)
        ret = common_utils.read_chunk(reader, j - i)
        X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
        Xs.append(X)
        ys += ret['y']
        names += ret['name']
        ts += ret['t']
    Xs = np.concatenate(Xs, axis=0)
    bins = np.array([one_hot(metrics.get_bin_custom(x, n_bins)) for x in ys])
    return (Xs, bins, ys, names, ts)

示例#9

0

显示文件

文件： main_cf.py 项目： klainfo/mimic3-benchmarks

def read_and_extract_features(reader, count, period, features):
    read_chunk_size = 1000
    Xs = []
    ys = []
    names = []
    ts = []
    for i in range(0, count, read_chunk_size):
        j = min(count, i + read_chunk_size)
        ret = common_utils.read_chunk(reader, j - i)
        X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
        Xs.append(X)
        ys += ret['y']
        names += ret['name']
        ts += ret['t']
    Xs = np.concatenate(Xs, axis=0)
    bins = np.array([one_hot(metrics.get_bin_custom(x, n_bins)) for x in ys])
    return (Xs, bins, ys, names, ts)

示例#10

0

显示文件

    def getitem(self, index, return_y_true=False):
        print(f"Start: {index} from reader:{self.reader.listfile}")

        B = self.batch_size
        ret = common_utils.read_chunk_index(self.reader, index*B, B)
        Xs = ret["X"]
        ts = ret["t"]
        ys = ret["y"]
        names = ret["name"]

        Xs = preprocess_chunk(Xs, ts, self.discretizer, self.normalizer)
        #(Xs, ys, ts, names) = common_utils.sort_and_shuffle([Xs, ys, ts, names], B)

        i=0
        X = common_utils.pad_zeros(Xs[i:i + B])
        y = ys[i:i+B]
        y_true = np.array(y)
        batch_names = names[i:i+B]
        batch_ts = ts[i:i+B]

        if self.partition == 'log':
            y = [metrics.get_bin_log(x, 10) for x in y]
        if self.partition == 'custom':
            y = [metrics.get_bin_custom(x, 10) for x in y]

        y = np.array(y)

        #aflanders: debug-Convert to tensors
        # X = tf.convert_to_tensor(X)
        # y = tf.convert_to_tensor(y)
        # y_true = tf.convert_to_tensor(y_true)
        #aflanders: debug-Convert to tensors

        if return_y_true:
            batch_data = (X, y, y_true)
        else:
            batch_data = (X, y)

        print(f"End: {index} from reader:{self.reader.listfile}")

        if not self.return_names:
            return batch_data
        else:
            return {"data": batch_data, "names": batch_names, "ts": batch_ts}

示例#11

0

显示文件

文件： utils.py 项目： huangmozhilv/mimic3-benchmarks

    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            Xs = self.data[0][0]
            masks = self.data[0][1]
            ys = self.data[1]
            (Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B)
            self.data = [[Xs, masks], ys]

            for i in range(0, len(self.data[1]), B):
                X = self.data[0][0][i:i + B]
                mask = self.data[0][1][i:i + B]
                y = self.data[1][i:i + B]

                y_true = [np.array(x) for x in y]
                y_true = nn_utils.pad_zeros(y_true)
                y_true = np.expand_dims(y_true, axis=-1)

                if self.partition == 'log':
                    y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in y
                    ]
                if self.partition == 'custom':
                    y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in y
                    ]

                X = nn_utils.pad_zeros(X)  # (B, T, D)
                mask = nn_utils.pad_zeros(mask)  # (B, T)
                y = nn_utils.pad_zeros(y)
                y = np.expand_dims(y, axis=-1)

                if self.return_y_true:
                    yield ([X, mask], y, y_true)
                else:
                    yield ([X, mask], y)

示例#12

0

显示文件

    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kvpairs = self.data.items()
            mas = [kv[1] for kv in kvpairs]
            mas = common_utils.sort_and_shuffle(mas, B)
            for i in range(len(kvpairs)):
                self.data[kvpairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = X.shape[1]

                ## ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                ## decomp
                decomp_M = self.data['decomp_M'][i:i + B]
                decomp_M = nn_utils.pad_zeros(decomp_M,
                                              min_length=self.ihm_pos + 1)
                decomp_y = self.data['decomp_y'][i:i + B]
                decomp_y = nn_utils.pad_zeros(decomp_y,
                                              min_length=self.ihm_pos + 1)
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, T, 1)
                outputs.append(decomp_y)

                ## los
                los_M = self.data['los_M'][i:i + B]
                los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1)
                los_y = self.data['los_y'][i:i + B]
                los_y_true = nn_utils.pad_zeros(los_y,
                                                min_length=self.ihm_pos + 1)

                if self.partition == 'log':
                    los_y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in los_y
                    ]
                if self.partition == 'custom':
                    los_y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in los_y
                    ]
                los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                ## pheno
                pheno_y = np.array(self.data['pheno_y'][i:i + B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(
                        T, axis=1)  # (B, T, 25)
                    outputs.append(pheno_seq)

                inputs = [X, ihm_M, decomp_M, los_M]

                if self.return_y_true:
                    yield (inputs, outputs, los_y_true)
                else:
                    yield (inputs, outputs)

示例#13

0

显示文件

    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kvpairs = self.data.items()
            mas = [kv[1] for kv in kvpairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = range(N)
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kvpairs)):
                    self.data[kvpairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kvpairs)):
                    self.data[kvpairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                X = nn_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                # decomp
                decomp_M = self.data['decomp_M'][i:i + B]
                decomp_M = nn_utils.pad_zeros(decomp_M,
                                              min_length=self.ihm_pos + 1)
                decomp_y = self.data['decomp_y'][i:i + B]
                decomp_y = nn_utils.pad_zeros(decomp_y,
                                              min_length=self.ihm_pos + 1)
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, T, 1)
                outputs.append(decomp_y)

                # los
                los_M = self.data['los_M'][i:i + B]
                los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos + 1)
                los_y = self.data['los_y'][i:i + B]
                los_y_true = nn_utils.pad_zeros(los_y,
                                                min_length=self.ihm_pos + 1)

                if self.partition == 'log':
                    los_y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in los_y
                    ]
                if self.partition == 'custom':
                    los_y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in los_y
                    ]
                los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos + 1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                # pheno
                pheno_y = np.array(self.data['pheno_y'][i:i + B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(
                        T, axis=1)  # (B, T, 25)
                    outputs.append(pheno_seq)

                inputs = [X, ihm_M, decomp_M, los_M]

                if self.return_y_true:
                    batch_data = (inputs, outputs, los_y_true)
                else:
                    batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {
                        "data": batch_data,
                        "names": self.data["names"][i:i + B],
                        "ts": self.data["ts"][i:i + B],
                        "decomp_ts": self.data["decomp_ts"][i:i + B],
                        "los_ts": self.data["los_ts"][i:i + B]
                    }

示例#14

0

显示文件

文件： utils.py 项目： klainfo/mimic3-benchmarks

    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kv_pairs = self.data.items()
            mas = [kv[1] for kv in kv_pairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = range(N)
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i+B]
                X = nn_utils.pad_zeros(X, min_length=self.ihm_pos+1)
                T = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i+B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i+B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                # decomp
                decomp_M = self.data['decomp_M'][i:i+B]
                decomp_M = nn_utils.pad_zeros(decomp_M, min_length=self.ihm_pos+1)
                decomp_y = self.data['decomp_y'][i:i+B]
                decomp_y = nn_utils.pad_zeros(decomp_y, min_length=self.ihm_pos+1)
                decomp_y = np.expand_dims(decomp_y, axis=-1)  # (B, T, 1)
                outputs.append(decomp_y)

                # los
                los_M = self.data['los_M'][i:i+B]
                los_M = nn_utils.pad_zeros(los_M, min_length=self.ihm_pos+1)
                los_y = self.data['los_y'][i:i+B]
                los_y_true = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1)

                if self.partition == 'log':
                    los_y = [np.array([metrics.get_bin_log(x, 10) for x in z]) for z in los_y]
                if self.partition == 'custom':
                    los_y = [np.array([metrics.get_bin_custom(x, 10) for x in z]) for z in los_y]
                los_y = nn_utils.pad_zeros(los_y, min_length=self.ihm_pos+1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                # pheno
                pheno_y = np.array(self.data['pheno_y'][i:i+B])
                outputs.append(pheno_y)
                if self.target_repl:
                    pheno_seq = np.expand_dims(pheno_y, axis=1).repeat(T, axis=1)  # (B, T, 25)
                    outputs.append(pheno_seq)

                inputs = [X, ihm_M, decomp_M, los_M]

                if self.return_y_true:
                    batch_data = (inputs, outputs, los_y_true)
                else:
                    batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {'data': batch_data,
                           'names': self.data['names'][i:i+B],
                           'decomp_ts': self.data['decomp_ts'][i:i+B],
                           'los_ts': self.data['los_ts'][i:i+B],
                           'pheno_ts': self.data['pheno_ts'][i:i + B]}

示例#15

0

显示文件

文件： utils.py 项目： sz891016/EHR

    def _generator(self):
        B = self.batch_size
        while True:
            # convert to right format for sort_and_shuffle
            kv_pairs = list(self.data.items())
            data_index = [pair[0] for pair in kv_pairs].index('X')
            if data_index > 0:
                kv_pairs[0], kv_pairs[data_index] = kv_pairs[
                    data_index], kv_pairs[0]
            mas = [kv[1] for kv in kv_pairs]

            if self.shuffle:
                N = len(self.data['X'])
                order = list(range(N))
                random.shuffle(order)
                tmp = [None] * len(mas)
                for mas_idx in range(len(mas)):
                    tmp[mas_idx] = [None] * len(mas[mas_idx])
                    for i in range(N):
                        tmp[mas_idx][i] = mas[mas_idx][order[i]]
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = tmp[i]
            else:
                # sort entirely
                mas = common_utils.sort_and_shuffle(mas, B)
                for i in range(len(kv_pairs)):
                    self.data[kv_pairs[i][0]] = mas[i]

            for i in range(0, len(self.data['X']), B):
                outputs = []

                # X
                X = self.data['X'][i:i + B]
                X = common_utils.pad_zeros(X, min_length=self.ihm_pos + 1)
                T = X.shape[1]

                # ihm
                ihm_M = np.array(self.data['ihm_M'][i:i + B])
                ihm_M = np.expand_dims(ihm_M, axis=-1)  # (B, 1)
                ihm_y = np.array(self.data['ihm_y'][i:i + B])
                ihm_y = np.expand_dims(ihm_y, axis=-1)  # (B, 1)
                outputs.append(ihm_y)
                if self.target_repl:
                    ihm_seq = np.expand_dims(ihm_y, axis=-1).repeat(
                        T, axis=1)  # (B, T, 1)
                    outputs.append(ihm_seq)

                # los
                los_M = self.data['los_M'][i:i + B]
                los_M = common_utils.pad_zeros(los_M,
                                               min_length=self.ihm_pos + 1)
                los_y = self.data['los_y'][i:i + B]
                los_y_true = common_utils.pad_zeros(los_y,
                                                    min_length=self.ihm_pos +
                                                    1)

                if self.partition == 'log':
                    los_y = [
                        np.array([metrics.get_bin_log(x, 10) for x in z])
                        for z in los_y
                    ]
                if self.partition == 'custom':
                    los_y = [
                        np.array([metrics.get_bin_custom(x, 10) for x in z])
                        for z in los_y
                    ]
                los_y = common_utils.pad_zeros(los_y,
                                               min_length=self.ihm_pos + 1)
                los_y = np.expand_dims(los_y, axis=-1)  # (B, T, 1)
                outputs.append(los_y)

                inputs = [X, ihm_M, los_M]

                if self.return_y_true:
                    batch_data = (inputs, outputs, los_y_true)
                else:
                    batch_data = (inputs, outputs)

                if not self.return_names:
                    yield batch_data
                else:
                    yield {
                        'data': batch_data,
                        'names': self.data['names'][i:i + B],
                        'los_ts': self.data['los_ts'][i:i + B]
                    }