def load_samples(filenames, feat_type, label_type, augment, qbnorm, size_limit, logfile=None): x, y, ncands = [], [], [] total_ncands = 0 for i, filename in enumerate(filenames): cand_x, cand_y, best = load_flat_samples(filename, feat_type, label_type, augment, qbnorm) x.append(cand_x) y.append(cand_y) ncands.append(cand_x.shape[0]) total_ncands += ncands[-1] if (i + 1) % 100 == 0: log(f" {i+1}/{len(filenames)} files processed ({total_ncands} candidate variables)", logfile) if total_ncands >= size_limit: log(f" dataset size limit reached ({size_limit} candidate variables)", logfile) break x = np.concatenate(x) y = np.concatenate(y) ncands = np.asarray(ncands) if total_ncands > size_limit: x = x[:size_limit] y = y[:size_limit] ncands[-1] -= total_ncands - size_limit return x, y, ncands
def load_batch_flat(sample_files, feats_type, augment_feats, normalize_feats): cand_features = [] cand_choices = [] cand_scoress = [] for i, filename in enumerate(sample_files): cand_states, cand_scores, cand_choice = utilities.load_flat_samples(filename, feats_type, 'scores', augment_feats, normalize_feats) cand_features.append(cand_states) cand_choices.append(cand_choice) cand_scoress.append(cand_scores) n_cands_per_sample = [v.shape[0] for v in cand_features] cand_features = np.concatenate(cand_features, axis=0).astype(np.float32, copy=False) cand_choices = np.asarray(cand_choices).astype(np.int32, copy=False) cand_scoress = np.concatenate(cand_scoress, axis=0).astype(np.float32, copy=False) n_cands_per_sample = np.asarray(n_cands_per_sample).astype(np.int32, copy=False) return cand_features, n_cands_per_sample, cand_choices, cand_scoress