Пример #1
0
def clustering():
    """
    Clustering all unlabeled texts
    """
    data = prepare_tfidf_data()
    SSE = []
    SilhouetteScores = []
    Labels = []
    with open('data/online_test/clustering_result_%s.txt' % get_time(),
              'w') as f:
        for k in range(2, 21):
            print('\nClustering, n_cluster = %d' % k)
            f.write('\nClustering, n_cluster = %d\n' % k)
            model = KMeans(n_clusters=k, init='k-means++', n_jobs=-1)
            labels = model.fit(data).labels_

            print(' model is trained, trying to compute silhouette score')
            score = metrics.silhouette_score(data, labels)
            Labels.append(labels)
            SilhouetteScores.append(score)
            SSE.append(model.inertia_)
            print('SSE: {}\n'.format(SSE))
            print('SilhouetteScores: {}\n'.format(SilhouetteScores))
            f.write(' SSE: {}\n SilhouetteScores: {}\n'.format(
                SSE, SilhouetteScores))

    all_records = {'SSE': SSE, 'scores': SilhouetteScores, 'labels': Labels}
    save_pkl(all_records, 'data/online_test/clustering_records.pkl')
    # x = np.arange(1, 11)
    # plt.plot(x, SSE, 'o-')
    # plt.xlabel('k')
    # plt.ylabel('SSE')
    # plt.show()
    # plt.savefig('data/kmeans_clustering.pdf', format='pdf')
    print('Success!')
Пример #2
0
 def from_config(cls, validation_ratio, validation_seed, dir_path, data_sampling_freq,
                 start_sampling_freq, end_sampling_freq, start_seq_len, num_channels, return_long):
     assert end_sampling_freq <= data_sampling_freq
     target_location = os.path.join(dir_path, '{}c_{}v_{}ss_{}es_{}l.npz'.format(num_channels, DATASET_VERSION,
                                                                                 start_sampling_freq,
                                                                                 end_sampling_freq, start_seq_len))
     if os.path.exists(target_location):
         print('loading dataset from file: {}'.format(target_location))
         given_data = np.load(target_location)
         given_data = [load_pkl(target_location + '.pkl'), [given_data['arr_{}'.format(i)]
                                                            for i in range(len(given_data.keys()))]]
     else:
         print('creating dataset from scratch')
         dataset = cls(None, dir_path, data_sampling_freq, start_sampling_freq,
                       end_sampling_freq, start_seq_len, num_channels, return_long)
         np.savez_compressed(target_location, *dataset.datas)
         save_pkl(target_location + '.pkl', dataset.data_pointers)
         given_data = [dataset.data_pointers, dataset.datas]
     return_datasets = []
     for i in range(2):
         return_datasets.append(cls(given_data, dir_path, data_sampling_freq, start_sampling_freq,
                                    end_sampling_freq, start_seq_len, num_channels, return_long))
     data_pointers = [x for x in return_datasets[0].data_pointers]
     np.random.seed(validation_seed)
     np.random.shuffle(data_pointers)
     return_datasets[0].data_pointers = data_pointers[:int((1 - validation_ratio) * len(data_pointers))]
     return_datasets[1].data_pointers = data_pointers[int((1 - validation_ratio) * len(data_pointers)):]
     return return_datasets[0], return_datasets[1]
Пример #3
0
    def save_weight_to_pkl(self):
        if not os.path.exists(self.weight_dir):
            os.makedirs(self.weight_dir)

        for name in self.w.keys():
            save_pkl(self.w[name].eval(),
                     os.path.join(self.weight_dir, "%s.pkl" % name))
Пример #4
0
    def getPersons(self):
        for video_id in range(1, self.num_videos + 1):
            self.joints_dict = {}
            video_folder = os.path.join(self.dataset_folder,
                                        'seq' + '%02d' % video_id)
            video_id = str(video_id)
            annotation_file = os.path.join(video_folder, 'annotations_new.txt')
            lines = open(annotation_file).readlines()
            img_list = sorted(glob.glob(os.path.join(video_folder, "*.jpg")))
            #print video_id, len(img_list)

            for line in lines:
                frame_id, rects = self.annotation_parse(line)
                imgs = {}
                if int(frame_id) + 4 <= len(
                        img_list) and int(frame_id) - 5 > 0:
                    print video_id, frame_id
                    clip_list = img_list[int(frame_id) - 6:int(frame_id) + 4]
                    imgs['pre'] = clip_list[:5][::-1]
                    imgs['back'] = clip_list[4:]
                    #print imgs
                    save_path = os.path.join(self.save_folder, video_id,
                                             frame_id)
                    if not (os.path.exists(save_path)):
                        os.makedirs(save_path)
                        # We will track the frames as we load them off of disk
                        self.track(rects, imgs, self.tracker, save_path)
            utils.save_pkl(self.joints_dict,
                           os.path.join(self.save_folder, 'pose_' + video_id))
    def _compute_result(self,
                        model_name,
                        data_loader,
                        target,
                        class_weight,
                        inference_fn,
                        save_name,
                        conditional=False):
        """Load model and compute performance with given inference method"""

        state_dict = torch.load(os.path.join(self.save_path, model_name))
        self.network.load_state_dict(state_dict['model'])
        loss, output, feature = self._test(data_loader)
        if conditional:
            predict = inference_fn(output, target)
        else:
            predict = inference_fn(output)
        per_class_AP = utils.compute_weighted_AP(target, predict, class_weight)
        mAP = utils.compute_mAP(per_class_AP, self.subclass_idx)
        result = {
            'output': output.cpu().numpy(),
            'feature': feature.cpu().numpy(),
            'per_class_AP': per_class_AP,
            'mAP': mAP
        }
        utils.save_pkl(result, os.path.join(self.save_path, save_name))
        return mAP
Пример #6
0
def savePointsDict(path):
    for camera in camera_list:
        ann = ut.load_json(path +
                           "CompleteAnnotations_2016-07-11/%s.json" % camera)
        pointsDict = {dots["imName"]: dots["xy"] for dots in ann["dots"]}
        ut.save_pkl(path + "CompleteAnnotations_2016-07-11/%s.pkl" % camera,
                    pointsDict)
Пример #7
0
def train(args):
    devices = utils.get_devices(args.gpu)
    if args.seed is not None:
        utils.manual_seed(args.seed)

    logging.info("Loading data...")
    dataloader = create_dataloader(args)
    vocab = dataloader.dataset.vocab
    utils.save_pkl(vocab, os.path.join(args.save_dir, "vocab.pkl"))

    logging.info("Initializing training environment...")
    mdl = prepare_model(args, dataloader)
    optimizer_cls = get_optimizer_cls(args)
    trainer = Trainer(model=utils.to_device(mdl, devices),
                      device=devices[0],
                      vocab=vocab,
                      epochs=args.epochs,
                      save_dir=args.save_dir,
                      save_period=args.save_period,
                      optimizer_cls=optimizer_cls,
                      tensor_key="tensor",
                      samples=args.samples,
                      show_progress=args.show_progress,
                      kld_annealing=args.kld_annealing,
                      dynamic_rnn=mdl.encoder.rnn.dynamic
                      or mdl.decoder.rnn.dynamic)
    report_model(trainer)

    logging.info("Commecing training...")
    trainer.train(dataloader)

    logging.info("Done!")
Пример #8
0
    def evaluate(self):
        self.preds = []
        self.targets = []
        self.T = self.num_frames['test']
        HPIM_f_dict, HAIM_f_dict = {}, {}
        with torch.no_grad():
            for i, data in enumerate(self.data_loaders['test']):
                inputs = data[0].cuda()
                labels = data[-2]
                labels = labels.view(-1, *labels.size()[2:])
                labels = torch.split(labels, 1, dim=-1)
                target = labels[1].squeeze()[0]
                img_paths = data[-1]
                # compute output
                _, _, activity_scores, BIM_f, PIM_f = self.net(inputs, 'test')
                activity_scores = activity_scores.view(self.T, -1).mean(dim=0)
                pred = torch.argmax(activity_scores, -1)
                self.preds.append(pred.cpu())
                self.targets.append(target)
                clip_id = '/'.join(img_paths[0][0].split('/')[4:6])
                #                 print(HPIM_f.size())
                HPIM_f_dict[clip_id] = HPIM_f.cpu()
                #                 HAIM_f_dict[clip_id] = HAIM_f
                if i == 100:
                    utils.save_pkl(HPIM_f_dict, 'HPIM_f_dict')
                    HPIM_f_dict = {}
                    break
#             utils.save_pkl(HPIM_f_dict, 'HPIM_f_dict')
#           utils.save_pkl(HAIM_f_dict, 'HAIM_f_dict')
            self.show()
def extract_candidates(predictions_scan, annotations, tf_matrix, pid, outputs_path):
    print 'computing blobs'
    start_time = time.time()
    blobs = blobs_detection.blob_dog(predictions_scan[0, 0], min_sigma=1, max_sigma=15, threshold=0.1)
    print 'blobs computation time:', (time.time() - start_time) / 60.

    print 'n_blobs detected', len(blobs)
    correct_blobs_idxs = []
    for zyxd in annotations:
        r = zyxd[-1] / 2.
        distance2 = ((zyxd[0] - blobs[:, 0]) ** 2
                     + (zyxd[1] - blobs[:, 1]) ** 2
                     + (zyxd[2] - blobs[:, 2]) ** 2)
        blob_idx = np.argmin(distance2)
        print 'node', zyxd
        print 'closest blob', blobs[blob_idx]
        if distance2[blob_idx] <= r ** 2:
            correct_blobs_idxs.append(blob_idx)
        else:
            print 'not detected !!!'

    # we will save blobs the the voxel space of the original image
    # blobs that are true detections will have blobs[-1] = 1 else 0
    blobs_original_voxel_coords = []
    for j in xrange(blobs.shape[0]):
        blob_j = np.append(blobs[j, :3], [1])
        blob_j_original = tf_matrix.dot(blob_j)
        blob_j_original[-1] = 1 if j in correct_blobs_idxs else 0
        if j in correct_blobs_idxs:
            print 'blob in original', blob_j_original
        blobs_original_voxel_coords.append(blob_j_original)

    blobs = np.asarray(blobs_original_voxel_coords)
    utils.save_pkl(blobs, outputs_path + '/%s.pkl' % pid)
Пример #10
0
def text_to_word(text, save_dir):
    logger = set_logger('word-process')
    sp_text = []
    for i in range(len(text)):
        sp_text.append(text[i].split())

    unq_word = []

    logger.info('Set Dictionary.')

    for line in tqdm(sp_text):
        for word in line:
            if word not in unq_word:
                unq_word.append(word)

    logger.info('# of unique Word : {}\texample : {}'.format(len(unq_word), random.sample(unq_word, 50)))

    all_words = []
    hangul = re.compile('[-=.#/?:^~!$}0-9]')

    for line in tqdm(sp_text):
        for word in line:
            word = hangul.sub('', word)
            if word:
                all_words.append(word)

    word_count = {}
    for word in all_words:
        if word in word_count:
            word_count[word] += 1
        else:
            word_count[word] = 1
    sorted_words = sorted([(k, v) for k, v in word_count.items()],
                          key=lambda word_count: -word_count[1])[:40000]

    label_word = {i + 1: ch[0] for i, ch in enumerate(sorted_words)}
    word_label = {y: x for x, y in label_word.items()}

    x = np.asarray([[word_label[w] for w in sent if w in word_label.keys()] for sent in sp_text])

    y_neg = [[1, 0] for _ in range(45000)]
    y_pos = [[0, 1] for _ in range(45000)]
    y = np.asarray(y_neg + y_pos)

    np.random.seed(618);
    np.random.shuffle(x)
    np.random.seed(618);
    np.random.shuffle(y)

    # Check Folder
    folder_check(dir_path=save_dir, dir_name='npz')
    folder_check(dir_path=save_dir, dir_name='dictionary')

    # Save Array & Dictionary
    save_npz(npz_path=save_dir + '/npz', npz_name='x_word.npz', arr=x)
    save_npz(npz_path=save_dir + '/npz', npz_name='y_word.npz', arr=y)
    save_pkl(pkl_path=save_dir + '/dictionary', pkl_name='dictionary_word.pkl', save_object=label_word)

    return None
Пример #11
0
def make_luna_validation_split():
    luna_path = pathfinder.LUNA_DATA_PATH
    file_list = sorted(glob.glob(luna_path + "/*.mhd"))
    random.seed(317070)
    all_pids = [utils_lung.extract_pid_filename(f) for f in file_list]
    validation_pids = random.sample(all_pids, int(VALIDATION_SET_SIZE * len(file_list)))
    train_pids = list(set(all_pids) - set(validation_pids))
    d = {}
    d['valid'] = validation_pids
    d['train'] = train_pids
    utils.save_pkl(d, pathfinder.LUNA_VALIDATION_SPLIT_PATH)
Пример #12
0
    def save_weights(self, weight_dir):
        """
        Save weights
        """
        print('Saving weights to %s ...' % weight_dir)
        if not os.path.exists(weight_dir):
            os.makedirs(weight_dir)

        for name in self.w.keys():
            save_pkl(self.w[name].eval(),
                     os.path.join(weight_dir, "%s.pkl" % name))
Пример #13
0
def make_luna_validation_split():
    luna_path = pathfinder.LUNA_DATA_PATH
    file_list = sorted(glob.glob(luna_path + "/*.mhd"))
    random.seed(317070)
    all_pids = [utils_lung.extract_pid_filename(f) for f in file_list]
    validation_pids = random.sample(all_pids, int(VALIDATION_SET_SIZE * len(file_list)))
    train_pids = list(set(all_pids) - set(validation_pids))
    d = {}
    d['valid'] = validation_pids
    d['train'] = train_pids
    utils.save_pkl(d, pathfinder.LUNA_VALIDATION_SPLIT_PATH)
Пример #14
0
def get_svm_model(X_train, y_train, retrain=False):
    from sklearn.svm import SVC

    if not retrain:
        svm_model = load_pkl(SVM_MODEL_PATH)
        if svm_model is not None:
            return svm_model
    print("start training SVM model...")
    svm_model = SVC()
    svm_model.fit(X_train, y_train)
    save_pkl(SVM_MODEL_PATH, svm_model)
    return svm_model
Пример #15
0
def get_mnb_model(X_train, y_train, retrain=False):
    from sklearn.naive_bayes import MultinomialNB

    if not retrain:
        mnb_model = load_pkl(MNB_MODEL_PATH)
        if mnb_model is not None:
            return mnb_model
    print("start training MNB model...")
    mnb_model = MultinomialNB(alpha=1, fit_prior=True)
    mnb_model.fit(X_train, y_train)
    save_pkl(MNB_MODEL_PATH, mnb_model)
    return mnb_model
Пример #16
0
def get_lr_model(X_train, y_train, retrain=False):
    from sklearn.linear_model import LogisticRegression

    if not retrain:
        lr_model = load_pkl(LR_MODEL_PATH)
        if lr_model is not None:
            return lr_model
    print("start training LR model...")
    lr_model = LogisticRegression(max_iter=200, n_jobs=-1)
    lr_model.fit(X_train, y_train)
    save_pkl(LR_MODEL_PATH, lr_model)
    return lr_model
Пример #17
0
    def dump_features_all_item(self, name):
        if name == 'vali':
            data = self.data.vali_batch
        elif name == 'test':
            data = self.data.test_batch
        elif name == 'train':
            data = self.data.train_batch
        else:
            raise Exception(f'unknown name: {name}')

        users = []
        items = []
        logits = []

        from run_for_fuse import all_res
        fn_list = all_res.keys()

        user2items = None
        for fn in fn_list:
            _users, _items_list, _ = utils.load_pkl(
                f'{utils.for_fuse_dir}/{fn}_{name}')
            if user2items is None:
                user2items = {}
                for _u, _items in zip(_users, _items_list):
                    user2items[_u] = set(_items)
            else:
                assert set(user2items.keys()) == set(_users)
                for _u, _items in zip(_users, _items_list):
                    user2items[_u] |= set(_items)

        pbar = tqdm(desc=f'dump {name}, predicting...', leave=False)
        for pv in self.model.predict(data):
            pbar.update(1)
            users.extend(pv.user.tolist())
            for i in range(len(pv.user)):
                user = pv.user[i]
                _items_i = sorted(user2items[user])
                items.append(_items_i)
                logits.append(pv.all_scores[i, _items_i].tolist())

        pbar.close()

        feat = [users, items, logits]

        fn = f'{utils.for_fuse_dir}/union_{args.msg}_{name}'

        print(f'{utils.get_time_str()} dump file {fn}')
        utils.save_pkl(feat, fn)
        print(f'{utils.get_time_str()} dump file {fn} over')

        return fn
Пример #18
0
 def OnSave(self, event):
     """
     Save labeled data
     """
     if len(self.cur_text['sents']) > 0:
         if self.cur_text['text_ind'] not in self.agent.history_texts:
             self.data.append(self.cur_text['sents'])
             self.agent.history_texts.append(self.cur_text['text_ind'])
         else:
             print('\nText%d exists and it is replaced now.\n' %
                   self.cur_text['text_ind'])
             ind = self.agent.history_texts.index(self.cur_text['text_ind'])
             self.data[ind] = self.cur_text['sents']
     print('Saving data to %s' % self.output_file_name)
     save_pkl([self.agent.history_texts, self.data], self.output_file_name)
Пример #19
0
def save_train_validation_ids(filename, data_path):
    patient_dirs = sorted(glob.glob(data_path + "/*/study/"),
                          key=lambda folder: int(re.search(r'/(\d+)/', folder).group(1)))
    dirs_indices = range(0, len(patient_dirs))

    valid_dirs_indices = get_cross_validation_indices(indices=dirs_indices, validation_index=0)
    train_patient_indices = list(set(dirs_indices) - set(valid_dirs_indices))

    train_patient_dirs = [utils.get_patient_id(patient_dirs[idx]) for idx in train_patient_indices]
    validation_patient_dirs = [utils.get_patient_id(patient_dirs[idx]) for idx in valid_dirs_indices]

    d = {'train': train_patient_dirs, 'valid': validation_patient_dirs}
    utils.save_pkl(d, filename)
    print 'train-valid patients split saved to', filename
    return d
Пример #20
0
 def OnQuit(self, event):
     """
     Quit the APP
     """
     dlg = wx.TextEntryDialog(
         None,
         "Enter a name for saving the annotation data and press 'OK' ('Cancel' for no saving)!",
         'Message Window', self.output_file_name, wx.OK | wx.CANCEL)
     if dlg.ShowModal() == wx.ID_OK:
         filename = dlg.GetValue()
         if filename:
             save_pkl([self.agent.history_texts, self.data], filename)
     #else:
     #    filename = 'data/online_test/labeled_data%s' % get_time()
     #    save_pkl([self.agent.history_texts, self.data], filename)
     wx.Exit()
Пример #21
0
def extract_candidates(predictions_scan, tf_matrix, pid, outputs_path):
    print 'computing blobs'
    start_time = time.time()
    blobs = blobs_detection.blob_dog(predictions_scan[0, 0], min_sigma=1, max_sigma=15, threshold=0.1)
    print 'blobs computation time:', (time.time() - start_time) / 60.
    print 'n blobs detected:', blobs.shape[0]

    blobs_original_voxel_coords = []
    for j in xrange(blobs.shape[0]):
        blob_j = np.append(blobs[j, :3], [1])
        blob_j_original = tf_matrix.dot(blob_j)
        blobs_original_voxel_coords.append(blob_j_original)

    blobs = np.asarray(blobs_original_voxel_coords)
    print blobs.shape
    utils.save_pkl(blobs, outputs_path + '/%s.pkl' % pid)
Пример #22
0
def extract_candidates(predictions_scan, tf_matrix, pid, outputs_path):
    print 'computing blobs'
    start_time = time.time()
    blobs = blobs_detection.blob_dog(predictions_scan[0, 0], min_sigma=1, max_sigma=15, threshold=0.1)
    print 'blobs computation time:', (time.time() - start_time) / 60.
    print 'n blobs detected:', blobs.shape[0]

    blobs_original_voxel_coords = []
    for j in xrange(blobs.shape[0]):
        blob_j = np.append(blobs[j, :3], [1])
        blob_j_original = tf_matrix.dot(blob_j)
        blobs_original_voxel_coords.append(blob_j_original)

    blobs = np.asarray(blobs_original_voxel_coords)
    print blobs.shape
    utils.save_pkl(blobs, outputs_path + '/%s.pkl' % pid)
    def test(self):
        # Test and save the result
        test_color_result = self._test(self.test_color_loader)
        test_gray_result = self._test(self.test_gray_loader)
        utils.save_pkl(test_color_result,
                       os.path.join(self.save_path, 'test_color_result.pkl'))
        utils.save_pkl(test_gray_result,
                       os.path.join(self.save_path, 'test_gray_result.pkl'))

        # Output the classification accuracy on test set
        info = ('Test on color images accuracy: {}, domain accuracy; {}\n'
                'Test on gray images accuracy: {}, domain accuracy: {}'.format(
                    test_color_result['class_accuracy'],
                    test_color_result['domain_accuracy'],
                    test_gray_result['class_accuracy'],
                    test_gray_result['domain_accuracy']))
        utils.write_info(os.path.join(self.save_path, 'test_result.txt'), info)
Пример #24
0
 def _load_json_emb(self):
     fn = get_save_path() + '/{}_graph2vec_json_dict.pkl'.format(
         self.dataset)
     if isfile(fn):
         with open(fn, 'rb') as handle:
             d = load_pkl(handle)
             print('Loaded json dict from {}'.format(fn))
             return d
     dfn = get_model_path(
     ) + '/graph2vec_tf/embeddings/{}_train_test_dims_{}_epochs_1000_lr_0.3_embeddings.txt'.format(
         self.dataset, self.dim)
     with open(dfn) as json_data:
         d = json.load(json_data)
     with open(fn, 'wb') as handle:
         save_pkl(d, handle)
         print('Loaded json dict from {}\nSaved to {}'.format(dfn, fn))
     return d
Пример #25
0
 def _load_sim_mat(self):
     fn = get_result_path() + '/{}/sim/{}_graph2vec_dim_{}_sim_{}.npy'.format( \
         self.dataset, self.dataset, self.dim, self.sim)
     if isfile(fn):
         with open(fn, 'rb') as handle:
             sim_mat = load_pkl(handle)
             print('Loaded sim mat from {}'.format(fn))
             return sim_mat
     train_emb = self._load_emb(True)
     test_emb = self._load_emb(False)
     if self.sim == 'dot':
         sim_mat = test_emb.dot(train_emb.T)
     else:
         raise RuntimeError('Unknown sim {}'.format(self.sim))
     with open(fn, 'wb') as handle:
         save_pkl(sim_mat, handle)
         print('Saved sim mat {} to {}'.format(sim_mat.shape, fn))
     return sim_mat
Пример #26
0
def prepare_tfidf_data():
    """
    Compute tfidf features for texts
    """
    filename = 'data/online_test/tf_idf_data_max_feature20000.pkl'
    if os.path.exists(filename):
        print('Loading data from %s' % filename)
        return load_pkl(filename)
    else:
        texts = load_pkl('data/home_and_garden_500_words_with_title.pkl')
        print('Vectorizing texts ...')
        converter = TfidfVectorizer(decode_error='ignore',
                                    stop_words='english',
                                    max_features=20000)
        tf_idf = converter.fit_transform([' '.join(t['sent']) for t in texts])
        print('n_samples: %d  n_features: %d' % tf_idf.shape)
        save_pkl(tf_idf, filename)
        return tf_idf
Пример #27
0
def test2():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    luna_data_paths = utils_lung.get_patient_data_paths(pathfinder.LUNA_DATA_PATH)
    luna_data_paths = [p for p in luna_data_paths if '.mhd' in p]
    print len(luna_data_paths)
    pid2mm_shape = {}

    for k, p in enumerate(luna_data_paths):
        img, origin, spacing = utils_lung.read_mhd(p)
        id = os.path.basename(p).replace('.mhd', '')
        mm_shape = img.shape * spacing
        pid2mm_shape[id] = mm_shape
        print k, id, mm_shape
        if k % 50 == 0:
            print 'Saved'
            utils.save_pkl(pid2mm_shape, image_dir + '/pid2mm.pkl')

    utils.save_pkl(pid2mm_shape, image_dir + '/pid2mm.pkl')
    def test(self):
        # Test and save the result
        state_dict = torch.load(os.path.join(self.save_path, 'best.pth'))
        self.load_state_dict(state_dict)

        dev_class_loss, dev_domain_loss, dev_class_output, dev_domain_output, \
            dev_feature, dev_domain_accuracy = self._test(self.dev_loader)
        dev_predict_prob = self.inference(dev_class_output)
        dev_per_class_AP = utils.compute_weighted_AP(self.dev_target,
                                                     dev_predict_prob,
                                                     self.dev_class_weight)
        dev_mAP = utils.compute_mAP(dev_per_class_AP, self.subclass_idx)
        dev_result = {
            'output': dev_class_output.cpu().numpy(),
            'feature': dev_feature.cpu().numpy(),
            'per_class_AP': dev_per_class_AP,
            'mAP': dev_mAP,
            'domain_output': dev_domain_output.cpu().numpy(),
            'domain_accuracy': dev_domain_accuracy
        }
        utils.save_pkl(dev_result,
                       os.path.join(self.save_path, 'dev_result.pkl'))

        test_class_loss, test_domain_loss, test_class_output, test_domain_output, \
            test_feature, test_domain_accuracy = self._test(self.test_loader)
        test_predict_prob = self.inference(test_class_output)
        test_per_class_AP = utils.compute_weighted_AP(self.test_target,
                                                      test_predict_prob,
                                                      self.test_class_weight)
        test_mAP = utils.compute_mAP(test_per_class_AP, self.subclass_idx)
        test_result = {
            'output': test_class_output.cpu().numpy(),
            'feature': test_feature.cpu().numpy(),
            'per_class_AP': test_per_class_AP,
            'mAP': test_mAP,
            'domain_output': test_domain_output.cpu().numpy(),
            'domain_accuracy': test_domain_accuracy
        }
        utils.save_pkl(test_result,
                       os.path.join(self.save_path, 'test_result.pkl'))

        # Output the mean AP for the best model on dev and test set
        info = ('Dev mAP: {}\n' 'Test mAP: {}'.format(dev_mAP, test_mAP))
        utils.write_info(os.path.join(self.save_path, 'result.txt'), info)
def test2():
    image_dir = utils.get_dir_path('analysis', pathfinder.METADATA_PATH)
    luna_data_paths = utils_lung.get_patient_data_paths(
        pathfinder.LUNA_DATA_PATH)
    luna_data_paths = [p for p in luna_data_paths if '.mhd' in p]
    print len(luna_data_paths)
    pid2mm_shape = {}

    for k, p in enumerate(luna_data_paths):
        img, origin, spacing = utils_lung.read_mhd(p)
        id = os.path.basename(p).replace('.mhd', '')
        mm_shape = img.shape * spacing
        pid2mm_shape[id] = mm_shape
        print k, id, mm_shape
        if k % 50 == 0:
            print 'Saved'
            utils.save_pkl(pid2mm_shape, image_dir + '/pid2mm.pkl')

    utils.save_pkl(pid2mm_shape, image_dir + '/pid2mm.pkl')
Пример #30
0
def text_to_phoneme(text, save_dir):
    logger = set_logger('phoneme-process')
    sp_text = []
    hangul = re.compile('[^\u3131-\u3163\uac00-\ud7a3]+')
    for split in text:
        review = hangul.sub(' ', split_syllables(split))
        if len(review) != 0:
            sp_text.append(review)

    unq_phoneme = []
    logger.info('Set Dictionary.')

    for line in tqdm(sp_text):
        for phoneme in line:
            if phoneme not in unq_phoneme:
                unq_phoneme.append(phoneme)

    logger.info('# of unique Phoneme : {}\nexample : {}'.format(len(unq_phoneme), unq_phoneme[:50]))

    phoneme_label = {ch: i + 1 for i, ch in enumerate(unq_phoneme)}
    label_phoneme = {i + 1: ch for i, ch in enumerate(unq_phoneme)}

    x = np.asarray([[phoneme_label[w] for w in sent if w in phoneme_label.keys()] for sent in sp_text])
    y_neg = [[1, 0] for _ in range(45000)]
    y_pos = [[0, 1] for _ in range(45000)]
    y = np.asarray(y_neg + y_pos)

    np.random.seed(618);
    np.random.shuffle(x)
    np.random.seed(618);
    np.random.shuffle(y)

    # Check Folder
    folder_check(dir_path=save_dir, dir_name='npz')
    folder_check(dir_path=save_dir, dir_name='dictionary')

    # Save Array & Dictionary
    save_npz(npz_path=save_dir + '/npz', npz_name='x_phoneme.npz', arr=x)
    save_npz(npz_path=save_dir + '/npz', npz_name='y_phoneme.npz', arr=y)
    save_pkl(pkl_path=save_dir + '/dictionary', pkl_name='dictionary_phoneme.pkl', save_object=label_phoneme)

    return None
Пример #31
0
    def train_and_predict(self,
                          features,
                          target='label',
                          n_folds=5,
                          save=True,
                          seed=2019):
        self.fe = features
        X_train, y_train = self.train_[features].values, self.train_[
            target].values.astype('uint8')
        X_test = self.test_[self.fe].values

        skf = StratifiedKFold(n_splits=n_folds,
                              shuffle=True,
                              random_state=seed)
        self.pred = np.zeros(len(X_test))

        ## 进行K折训练
        for fold_, (train_idx,
                    val_idx) in enumerate(skf.split(X_train, y_train)):
            train_X, train_y = X_train[train_idx], y_train[train_idx]
            eval_X, eval_y = X_train[val_idx], y_train[val_idx]

            lgb_train = lgb.Dataset(train_X, train_y)
            lgb_eval = lgb.Dataset(eval_X, eval_y)

            print('\nFold: ', fold_)
            model = lgb.train(self.params,
                              lgb_train,
                              num_boost_round=7000,
                              valid_sets=[lgb_train, lgb_eval],
                              valid_names=['train', 'eval'],
                              early_stopping_rounds=200,
                              verbose_eval=1000)

            if save:
                save_pkl(
                    model,
                    os.path.join(self.opt['model_train'],
                                 f'lgb_fold_{fold_}.pkl'))

            self.pred += model.predict(X_test) / n_folds
        return self.pred
Пример #32
0
def text_classification():
    """
    Classify all unlabeled texts according to the clustering results 
    """
    filename = 'data/online_test/label2text.pkl'
    if os.path.exists(filename):
        label2text = load_pkl(filename)
    else:
        records = load_pkl('data/online_test/clustering_records.pkl')
        scores = records['scores']
        best_i = scores.index(max(scores[:10]))  # maximum categories = 11
        labels = records['labels'][best_i]
        best_k = best_i + 2  # k in [2, 20], i in [0, 18]
        label2text = {c: [] for c in range(best_k)}
        for ind, label in enumerate(labels):
            label2text[label].append(ind)
        save_pkl(label2text, filename)

    for label, text_inds in label2text.items():
        print('label: {}  n_samples: {}'.format(label, len(text_inds)))
    return label2text
Пример #33
0
    def __init__(self,
                 root="",
                 split=None,
                 transform_function=None,
                 sigma=8.0,
                 version=None,
                 ratio=0.3):

        self.split = split
        self.sigma = sigma

        self.name = "Penguins"
        self.path = "Penguins/"

        imgNames = ut.load_pkl(self.path + "/Splits_2016_07_11/%s_%s.pkl" %
                               (split, version))

        self.imgNames = imgNames
        path_points = (self.path + "/Splits_2016_07_11/points_%s_%s.pkl" %
                       (split, version))

        if not os.path.exists(path_points):
            allPointDict = save_pointsDict(self.path, self.imgNames)
            ut.save_pkl(path_points, allPointDict)

        self.pointsDict = ut.load_pkl(path_points)

        assert len(self.pointsDict) == len(self)

        self.split = split
        self.ratio = ratio
        self.n_classes = 2
        self.n_channels = 3
        self.transform_function = transform_function()
        self.version = version

        roi_name = self.path + "CompleteAnnotations_2016-07-11/roi.pkl"

        self.roiDict = ut.load_pkl(roi_name)
        self.longest = True
Пример #34
0
def save_train_validation_ids(filename, data_path):
    patient_dirs = sorted(
        glob.glob(data_path + "/*/study/"),
        key=lambda folder: int(re.search(r'/(\d+)/', folder).group(1)))
    dirs_indices = list(range(0, len(patient_dirs)))

    valid_dirs_indices = get_cross_validation_indices(indices=dirs_indices,
                                                      validation_index=0)
    train_patient_indices = list(set(dirs_indices) - set(valid_dirs_indices))

    train_patient_dirs = [
        utils.get_patient_id(patient_dirs[idx])
        for idx in train_patient_indices
    ]
    validation_patient_dirs = [
        utils.get_patient_id(patient_dirs[idx]) for idx in valid_dirs_indices
    ]

    d = {'train': train_patient_dirs, 'valid': validation_patient_dirs}
    utils.save_pkl(d, filename)
    print('train-valid patients split saved to', filename)
    return d
        assert patient_data["patient"] == patient_id
        patient_data["systole"] = np.concatenate((patient_data["systole"], systole_prediction[None, :]), axis=0)
        patient_data["diastole"] = np.concatenate((patient_data["diastole"], diastole_prediction[None, :]), axis=0)

avg_patient_predictions = config().get_avg_patient_predictions(batch_predictions, batch_ids, mean=mean)
patient_targets = utils_heart.get_patient_average_heaviside_predictions(batch_targets, batch_ids, mean=mean)

assert avg_patient_predictions.viewkeys() == patient_targets.viewkeys()
crpss_sys, crpss_dst = [], []
for id in avg_patient_predictions.iterkeys():
    crpss_sys.append(utils_heart.crps(avg_patient_predictions[id][0], patient_targets[id][0]))
    crpss_dst.append(utils_heart.crps(avg_patient_predictions[id][1], patient_targets[id][1]))

crps0, crps1 = np.mean(crpss_sys), np.mean(crpss_dst)
print "\nValidation CRPS: ", crps0, crps1, 0.5 * (crps0 + crps1)
utils.save_pkl(avg_patient_predictions, valid_prediction_path)
print " validation predictions saved to %s" % valid_prediction_path
print

# test
test_data_iterator = config().test_data_iterator
if n_tta_iterations == 1:
    test_data_iterator.transformation_params = config().valid_transformation_params
else:
    # just to be sure
    test_data_iterator.transformation_params["zoom_range"] = (1.0, 1.0)

print "test transformation params"
print test_data_iterator.transformation_params

print "n test: %d" % test_data_iterator.nsamples
Пример #36
0
candidates_config = 'dsb_c3_s5_p8a1'
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
candidates_path = predictions_dir + '/%s' % candidates_config
id2candidates_path = utils_lung.get_candidates_paths(candidates_path)

train_valid_ids = utils.load_pkl(pathfinder.VALIDATION_SPLIT_PATH)
train_pids, valid_pids, test_pids = train_valid_ids['training'], train_valid_ids['validation'], train_valid_ids['test']
all_pids = train_pids + valid_pids + test_pids

data_iterator = data_iterators.DSBPixelSpacingsGenerator(pathfinder.DATA_PATH, id2candidates_path, all_pids)


z = []
y = []
x = []
pixel_spacings = {}

# use buffering.buffered_gen_threaded()
for idx, (pid, pixel_spacing) in enumerate(data_iterator.generate()):
    print idx, pid, pixel_spacing
    z.append(pixel_spacing[0])
    y.append(pixel_spacing[1])
    x.append(pixel_spacing[2])
    pixel_spacings[pid] = pixel_spacing


utils.save_pkl(pixel_spacings, 'pixel_spacings_dsb.pkl')
print 'z', min(z), max(z)
print 'y', min(y), max(y)
print 'x', min(x), max(x)
Пример #37
0
def get_slice2roi(data_path, out_filename, num_circles=None, plot=False):
    patient_paths = sorted(glob.glob(data_path + '/*/study'))
    slice2roi = {}
    for p in patient_paths:
        patient_data = get_patient_data(p)

        # sax slices
        sax_slice_stack = []
        ch4, ch2 = None, None
        for s in patient_data:
            if 'sax' in s['slice_id']:
                sax_slice_stack.append(s)
            elif '4ch' in s['slice_id']:
                ch4 = s
            elif '2ch' in s['slice_id']:
                ch2 = s

        sorted_sax_slices = sort_sax_slices(sax_slice_stack)
        grouped_sax_slices = group_sax_slices(sorted_sax_slices)
        # init patient dict
        pid = sorted_sax_slices[0]['patient_id']
        # print pid
        slice2roi[pid] = {}

        for slice_group in grouped_sax_slices:
            # pixel spacing changes within one patient but not too much
            pixel_spacing = slice_group[0]['metadata']['PixelSpacing'][0]
            if num_circles:
                roi_center, roi_radii = data.extract_roi(slice_group, pixel_spacing, num_circles=num_circles)
            else:
                roi_center, roi_radii = data.extract_roi(slice_group, pixel_spacing)

            if plot:
                plot_roi(slice_group, roi_center, roi_radii)

            for s in slice_group:
                sid = s['slice_id']
                slice2roi[pid][sid] = {'roi_center': roi_center, 'roi_radii': roi_radii}

        # project found roi_centers on the 4ch and 2ch slice
        ch4_centers = []
        ch2_centers = []
        for slice in sorted_sax_slices:
            sid = slice['slice_id']
            roi_center = slice2roi[pid][sid]['roi_center']

            metadata_source = slice['metadata']
            hough_roi_center = (float(roi_center[0]) / metadata_source['Rows'],
                                float(roi_center[1]) / metadata_source['Columns'])
            if ch4 is not None:
                metadata_target = ch4['metadata']
                result = orthogonal_projection_on_slice(hough_roi_center, metadata_source, metadata_target)
                ch_roi_center = [float(result[0]) * metadata_target['Rows'],
                                 float(result[1]) * metadata_target['Columns']]
                ch4_centers.append(ch_roi_center)

            if ch2 is not None:
                metadata_target = ch2['metadata']
                result = orthogonal_projection_on_slice(hough_roi_center, metadata_source, metadata_target)
                ch_roi_center = [float(result[0]) * metadata_target['Rows'],
                                 float(result[1]) * metadata_target['Columns']]
                ch2_centers.append(ch_roi_center)

        if ch4 is not None:
            centers = np.array(ch4_centers)
            ch4_result_center = tuple(np.mean(centers, axis=0))
            ch4_result_radius = np.max(np.sqrt((centers - ch4_result_center) ** 2))
            ch4_result_radius = (ch4_result_radius, ch4_result_radius)
            sid = ch4['slice_id']
            slice2roi[pid][sid] = {'roi_center': ch4_result_center,
                                   'roi_radii': ch4_result_radius}
            if plot:
                plot_roi([ch4], ch4_result_center, ch4_result_radius)

        if ch2 is not None:
            centers = np.array(ch2_centers)
            ch2_result_center = tuple(np.mean(centers, axis=0))
            ch2_result_radius = np.max(np.sqrt((centers - ch2_result_center) ** 2))
            ch2_result_radius = (ch2_result_radius, ch2_result_radius)
            sid = ch2['slice_id']
            slice2roi[pid][sid] = {'roi_center': ch2_result_center,
                                   'roi_radii': ch2_result_radius}

            if plot:
                plot_roi([ch2], ch2_result_center, ch2_result_radius)

    utils.save_pkl(slice2roi, out_filename)
    print 'saved to ', out_filename
    return slice2roi
Пример #38
0
final_train = []
final_pos_train = []
final_neg_train = []
for pid in all_pids:
	if pid not in final_test:
		final_train.append(pid)
		if id2label[pid] == 1:
			final_pos_train.append(pid)
		elif id2label[pid] == 0:
			final_neg_train.append(pid)
		else:
			raise ValueError("weird shit is going down")



print 'pos id ratio final train set', 1.*len(final_pos_train) / (len(final_train))	
print 'final test/(train+test):', 1.*len(final_test) / (len(final_train) + len(final_test))

concat_str = ''.join(final_test)
print 'md5 of concatenated pids:', hashlib.md5(concat_str).hexdigest()

output = {'train':final_train, 'test':final_test}
output_name = pathfinder.METADATA_PATH+'final_split.pkl'
utils.save_pkl(output, output_name)
print 'final split saved at ', output_name





Пример #39
0
print
print 'Data'
print 'n samples: %d' % data_iterator.nsamples

prev_pid = None
candidates = []
patients_count = 0
max_malignancy = 0.
for n, (x, candidate_zyxd, id) in enumerate(data_iterator.generate()):
    pid = id[0]

    if pid != prev_pid and prev_pid is not None:
        print patients_count, prev_pid, len(candidates)
        candidates = np.asarray(candidates)
        utils.save_pkl(candidates, outputs_path + '/%s.pkl' % prev_pid)
        patients_count += 1
        candidates = []

    #print 'x.shape', x.shape
    x_shared.set_value(x)
    predictions = get_predictions_patch()
    #print 'predictions.shape', predictions.shape
    #print 'candidate_zyxd', candidate_zyxd.shape

    candidate_zyxd_pred = np.append(candidate_zyxd, [predictions])
    #print 'candidate_zyxd_pred', candidate_zyxd_pred
    candidates.append(candidate_zyxd_pred)

    prev_pid = pid
Пример #40
0
            print '%d/%d (epoch %.3f) time/nvalid_iter=%.2fs' % (
                niter, max_niter, niter / float(train_batches_per_epoch), current_time - prev_time)
            prev_time = current_time

            valid_loss = eval_valid()
            losses_eval_valid.append(valid_loss)
            eval_train_loss = eval_valid()
            losses_eval_train.append(eval_train_loss)
            if 'vae' in config.__name__:
                print 'validation loss: %0.4f KL: %0.4f CE: %0.4f' % (valid_loss[0], -valid_loss[1], valid_loss[2])
                print 'train loss: %0.4f KL: %0.4f CE: %0.4f' % (
                    eval_train_loss[0], -eval_train_loss[1], eval_train_loss[2])
            else:
                print 'validation loss: %0.4f' % valid_loss[0]
                print 'train loss: %0.4f' % eval_train_loss[0]
            print

    if (epoch + 1) % config.save_every == 0:
        d = {
            'configuration': config_name,
            'experiment_id': expid,
            'epochs_since_start': epoch,
            'losses_train': losses_train,
            'losses_eval_valid': losses_eval_valid,
            'losses_eval_train': losses_eval_train,
            'param_values': lasagne.layers.get_all_param_values(model.l_out)
        }
        utils.save_pkl(d, metadata_target_path)
        print "  saved to %s" % metadata_target_path
        print
Пример #41
0
  def save_weight_to_pkl(self):
    if not os.path.exists(self.weight_dir):
      os.makedirs(self.weight_dir)

    for name in self.w.keys():
      save_pkl(self.w[name].eval(), os.path.join(self.weight_dir, "%s.pkl" % name))
Пример #42
0
    avg_patient_predictions = config().get_avg_patient_predictions(batch_predictions, batch_ids, mean=mean)
    patient_targets = utils_heart.get_patient_average_heaviside_predictions(batch_targets, batch_ids)

    assert avg_patient_predictions.viewkeys() == patient_targets.viewkeys()
    crpss_sys, crpss_dst = [], []
    for id in avg_patient_predictions.iterkeys():
        crpss_sys.append(utils_heart.crps(avg_patient_predictions[id][0], patient_targets[id][0]))
        crpss_dst.append(utils_heart.crps(avg_patient_predictions[id][1], patient_targets[id][1]))
        print id, 0.5 * (crpss_sys[-1] + crpss_dst[-1]), crpss_sys[-1], crpss_dst[-1]

    crps0, crps1 = np.mean(crpss_sys), np.mean(crpss_dst)

    print '\n Train CRPS:', config().get_mean_crps_loss(batch_predictions, batch_targets, batch_ids)
    print 'Train CRPS', 0.5 * (crps0 + crps1)

    utils.save_pkl(avg_patient_predictions, prediction_path)
    print ' predictions saved to %s' % prediction_path
    print

if set == 'valid':
    valid_data_iterator = config().valid_data_iterator
    if n_tta_iterations > 1:
        valid_data_iterator.transformation_params = config().train_transformation_params
        valid_data_iterator.transformation_params['zoom_range'] = (1., 1.)

    print
    print 'n valid: %d' % valid_data_iterator.nsamples
    print 'tta iteration:',

    batch_predictions, batch_targets, batch_ids = [], [], []
    for i in xrange(n_tta_iterations):
Пример #43
0
print 'Data'
print 'n samples: %d' % data_iterator.nsamples

prev_pid = None
candidates = []
patients_count = 0
max_malignancy = 0.
for n, (x, candidate_zyxd, id) in enumerate(data_iterator.generate()):
    pid = id[0]

    if pid != prev_pid and prev_pid is not None:
        print patients_count, prev_pid, len(candidates)
        candidates = np.asarray(candidates)
        a = np.asarray(sorted(candidates, key=lambda x: x[-1], reverse=True))
        print 'max malignancies', a[:10,-1]
        utils.save_pkl(a, outputs_path + '/%s.pkl' % prev_pid)
        print 'saved predictions'
        patients_count += 1
        candidates = []

    x_shared.set_value(x)
    predictions = get_predictions_patch()
    #print 'predictions.shape', predictions.shape
    total_malignancy = np.sum(config().malignancy_weights*predictions)

    #print 'total_malignancy', total_malignancy
    #print 'candidate_zyxd', candidate_zyxd
    candidate_zyxd_pred = np.append(candidate_zyxd, [predictions])
    candidate_zyxd_pred_mal = np.append(candidate_zyxd_pred, [[total_malignancy]])
    candidates.append(candidate_zyxd_pred_mal)
Пример #44
0
print 'n samples: %d' % data_iterator.nsamples

prev_pid = None
candidates = []
patients_count = 0
patch_size = 48
stride = 16
for n, (x, id) in enumerate(data_iterator.generate()):
    pid = id

    print(pid)
    print model.l_out.output_shape
    predictions = np.empty(((x.shape[2]-patch_size+1)//stride, (x.shape[3]-patch_size+1)//stride, (x.shape[4]-patch_size+1)//stride,) + (model.l_out.output_shape[1],))
    print predictions.shape
    print 'x.shape', x.shape
    for idxi, i in enumerate(np.arange(0,x.shape[2]-patch_size,stride)):  
        print 'slice idxi', idxi 
	for idxj, j in enumerate(np.arange(0,x.shape[3]-patch_size,stride)):  
            for idxk, k in enumerate(np.arange(0,x.shape[4]-patch_size,stride)):
                #print i, j, k, '|', idxi, idxj, idxk, x.shape[4], x.shape[4]-patch_size+1
		x_in = x[0,0,i:i+patch_size,j:j+patch_size,k:k+patch_size]
                #print x_in.shape
                x_shared.set_value(x_in[None,:,:,:])
                fm = get_featuremap()
		#print fm.shape
                predictions[idxi,idxj,idxk] = fm[0]

    result = np.concatenate(predictions,axis=0)

    utils.save_pkl(result, outputs_path + '/%s.pkl' % pid)
Пример #45
0
    data_iterator = config().test_data_iterator

    print
    print 'Data'
    print 'n test: %d' % data_iterator.nsamples

    pid2prediction = {}
    for i, (x_test, _, id_test) in enumerate(buffering.buffered_gen_threaded(
            data_iterator.generate())):
        predictions = iter_test(x_test)
        pid = id_test[0]
        print predictions
        pid2prediction[pid] = predictions[1] if predictions.shape[-1] == 2 else predictions[0]
        print i, pid, predictions, pid2label[pid]

    utils.save_pkl(pid2prediction, output_pkl_file)
    print 'Saved validation predictions into pkl', os.path.basename(output_pkl_file)

    test_loss = utils_lung.evaluate_log_loss(pid2prediction, pid2label)
    print 'Test loss', test_loss

    utils_lung.write_submission(pid2prediction, output_csv_file)
    print 'Saved predictions into csv'
    loss = evaluate_submission.leaderboard_performance(output_csv_file)
    print loss

elif set == 'valid':
    data_iterator = config().valid_data_iterator

    print
    print 'Data'
Пример #46
0
            n_offsets = val_seq_len - cfg.seq_length
            pred_batch = []
            for ofs in range(n_offsets):
                pred = compute_output(ofs)
                pred_batch.append(pred)
            pred_batch = np.array(pred_batch).swapaxes(0,1)

            msk = m_shared.get_value(borrow=True)
            new_preds_batch = []
            for i in range(len(pred_batch)):
                l = len(msk[i][msk[i]==0.])
                new_preds_batch.append(pred_batch[i][:-l])
            preds.extend(new_preds_batch)
            print "%i%%"%int(np.round((b+1)/float(n_batches)*100.))

            utils.save_pkl(preds, target_path)

    else: 
        preds = utils.load_pkl(target_path)
        assert len(preds) == len(split.test_idxs)

    print "Preparing csv files"
    pred_files = glob(csv_path+"Sample*")
    if len(pred_files) != len(split.test_idxs):
        for i, pred in enumerate(preds):

            pred = np.argmax(pred, axis=-1)

            # window = 10
            # new_pred = pred.copy()
            # for j in range(len(pred)-window):
Пример #47
0
test_data_iterator.transformation_params = config().valid_transformation_params

batch_predictions, batch_ids = [], []
mu_predictions, sigma_predictions = [], []
for xs_batch_test, _, ids_batch in buffering.buffered_gen_threaded(test_data_iterator.generate()):
    for x_shared, x in zip(xs_shared, xs_batch_test):
        x_shared.set_value(x)
    batch_predictions.append(iter_test_det())
    batch_ids.append(ids_batch)
    mu_predictions.append(iter_mu())
    sigma_predictions.append(iter_sigma())

test_avg_patient_predictions = config().get_avg_patient_predictions(batch_predictions, batch_ids, mean='geometric')
pid2mu = utils_heart.get_patient_normparam_prediction(mu_predictions, batch_ids)
pid2sigma = utils_heart.get_patient_normparam_prediction(sigma_predictions, batch_ids)
test_pid2musigma = {}
for pid in pid2mu.iterkeys():
    test_pid2musigma[pid] = {'mu': pid2mu[pid], 'sigma': pid2sigma[pid]}

predictions = {'train': train_avg_patient_predictions,
               'valid': valid_avg_patient_predictions,
               'test': test_avg_patient_predictions}

predictions_mu_std = {'train': train_pid2musigma,
                      'valid': valid_pid2musigma,
                      'test': test_pid2musigma}

utils.save_pkl(predictions, prediction_path)
utils.save_pkl(predictions_mu_std, prediction_mu_std_path)
print ' predictions saved to %s' % prediction_path