示例#1
0
文件: tests.py 项目: alzayats/haven
    def test_hash(self):
        # test whether hashing works for nested dicts
        exp_dict_1 = {
            'model': {
                'name': 'mlp',
                'n_layers': 30
            },
            'dataset': 'mnist',
            'batch_size': 1
        }
        exp_dict_2 = {
            'dataset': 'mnist',
            'batch_size': 1,
            'model': {
                'name': 'mlp',
                'n_layers': 30
            }
        }
        exp_dict_3 = {
            'dataset': 'mnist',
            'batch_size': 1,
            'model': {
                'name': 'mlp'
            }
        }

        assert (hu.hash_dict(exp_dict_1) == hu.hash_dict(exp_dict_2))
        assert (hu.hash_dict(exp_dict_1) != hu.hash_dict(exp_dict_3))
示例#2
0
def test_get_score_lists():
    # save a score_list
    savedir_base = ".tmp"
    exp_dict = {
        "model": {
            "name": "mlp",
            "n_layers": 30
        },
        "dataset": "mnist",
        "batch_size": 1
    }
    score_list = [{"epoch": 0, "acc": 0.5}, {"epoch": 0, "acc": 0.9}]

    hu.save_pkl(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"),
        score_list)
    hu.save_json(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"),
        exp_dict)
    # check if score_list can be loaded and viewed in pandas
    exp_list = hu.get_exp_list(savedir_base=savedir_base)

    score_lists = hr.get_score_lists(exp_list, savedir_base=savedir_base)
    assert score_lists[0][0]["acc"] == 0.5
    assert score_lists[0][1]["acc"] == 0.9

    shutil.rmtree(savedir_base)
示例#3
0
def test_cartesian_product():
    # test whether the cartesian product covers all needed variations
    exp_dict_1 = {"dataset": "mnist", "model": "mlp", "batch_size": 1}
    exp_dict_2 = {"dataset": "mnist", "model": "mlp", "batch_size": 5}
    exp_dict_3 = {"dataset": "cifar10", "model": "mlp", "batch_size": 1}
    exp_dict_4 = {"dataset": "cifar10", "model": "mlp", "batch_size": 5}

    exp_list = [exp_dict_1, exp_dict_2, exp_dict_3, exp_dict_4]
    exp_list_cartesian = hu.cartesian_exp_group({
        "dataset": ["mnist", "cifar10"],
        "model": "mlp",
        "batch_size": [1, 5]
    })

    exp_list_hash = [hu.hash_dict(e) for e in exp_list]
    exp_list_cartesian_hash = [hu.hash_dict(e) for e in exp_list_cartesian]

    # check if the # experiments is correct
    assert len(exp_list_cartesian_hash) == len(exp_list_hash)

    # check that the hashes in the cartesian are all there
    for h in exp_list_hash:
        assert h in exp_list_cartesian_hash

    # check that every hash is unique
    assert len(exp_list_cartesian_hash) == len(
        np.unique(exp_list_cartesian_hash))
示例#4
0
    def test_get_plot(self):
        # save a score_list
        savedir_base = '.tmp'
        exp_dict = {'model':{'name':'mlp', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1}
        score_list = [{'epoch': 0, 'acc':0.5}, {'epoch': 1, 'acc':0.9}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'score_list.pkl'), score_list)
        hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'exp_dict.json'), exp_dict)
        # check if score_list can be loaded and viewed in pandas
        exp_list = hu.get_exp_list(savedir_base=savedir_base)
        
        fig, axis = hr.get_plot(exp_list,
             savedir_base=savedir_base,
             filterby_list=[({'model':{'name':'mlp'}},
                             {'style':{'color':'red'}})],
             x_metric='epoch',
             y_metric='acc')
        # fig, axis = hr.get_plot(exp_list,
        #      savedir_base=savedir_base,
        #      x_metric='epoch',
        #      y_metric='acc',
        #      mode='pretty_plot')
        fig, axis = hr.get_plot(exp_list,
             savedir_base=savedir_base,
             x_metric='epoch',
             y_metric='acc',
             mode='bar')
        fig.savefig(os.path.join('.tmp', 
                        'test.png'))

        shutil.rmtree('.tmp')
示例#5
0
    def test_zipdir(self):
        # save a score_list
        savedir_base = ".tmp"
        exp_dict = {
            "model": {
                "name": "mlp",
                "n_layers": 30
            },
            "dataset": "mnist",
            "batch_size": 1
        }
        score_list = [{"epoch": 0, "acc": 0.5}, {"epoch": 0, "acc": 0.9}]

        hu.save_pkl(
            os.path.join(savedir_base, hu.hash_dict(exp_dict),
                         "score_list.pkl"), score_list)
        hu.save_json(
            os.path.join(savedir_base, hu.hash_dict(exp_dict),
                         "exp_dict.json"), exp_dict)
        # check if score_list can be loaded and viewed in pandas
        exp_list = hr.get_exp_list(savedir_base=savedir_base)

        score_lists = hr.get_score_lists(exp_list, savedir_base=savedir_base)
        assert score_lists[0][0]["acc"] == 0.5
        assert score_lists[0][1]["acc"] == 0.9
        from haven import haven_dropbox as hd

        hd.zipdir([hu.hash_dict(exp_dict) for exp_dict in exp_list],
                  savedir_base,
                  src_fname=".tmp/results.zip")
        shutil.rmtree(savedir_base)
示例#6
0
    def test_get_score_df(self):
        # save a score_list
        savedir_base = '.tmp'
        exp_dict = {'model':{'name':'mlp', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1}
        exp_dict2 = {'model':{'name':'mlp2', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1}

        score_list = [{'epoch': 0, 'acc':0.5}, {'epoch': 0, 'acc':0.9}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'score_list.pkl'), score_list)
                     
        hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict),
                     'exp_dict.json'), exp_dict)

        hu.save_json(os.path.join(savedir_base, hu.hash_dict(exp_dict2),
                     'exp_dict.json'), exp_dict)
        # check if score_list can be loaded and viewed in pandas
        exp_list = hu.get_exp_list(savedir_base=savedir_base)
        score_df = hr.get_score_df(exp_list, savedir_base=savedir_base)
        
        assert(np.array(score_df['dataset'])[0].strip("'") == 'mnist')

        shutil.rmtree('.tmp')
示例#7
0
    def launch_exp_dict(self, exp_dict, savedir, command, job=None):
        """Submit a job job and save job dict and exp_dict."""
        # Check for duplicates
        # if job is not None:
        # assert self._assert_no_duplicates(job)

        fname_exp_dict = os.path.join(savedir, "exp_dict.json")
        hu.save_json(fname_exp_dict, exp_dict)
        assert (hu.hash_dict(
            hu.load_json(fname_exp_dict)) == hu.hash_dict(exp_dict))

        # Define paths
        workdir_job = os.path.join(savedir, "code")

        # Copy the experiment code into the experiment folder
        hu.copy_code(self.workdir + "/", workdir_job, verbose=0)

        # Run  command
        job_id = self.submit_job(command, workdir_job, savedir_logs=savedir)

        # Verbose
        if self.verbose:
            print("Job_id: %s command: %s" % (job_id, command))

        job_dict = {"job_id": job_id, "command": command}

        hu.save_json(get_job_fname(savedir), job_dict)

        return job_dict
示例#8
0
def test_hash():
    # test whether hashing works for nested dicts
    exp_dict_1 = {
        "model": {
            "name": "mlp",
            "n_layers": 30
        },
        "dataset": "mnist",
        "batch_size": 1
    }
    exp_dict_2 = {
        "dataset": "mnist",
        "batch_size": 1,
        "model": {
            "name": "mlp",
            "n_layers": 30
        }
    }
    exp_dict_3 = {
        "dataset": "mnist",
        "batch_size": 1,
        "model": {
            "name": "mlp"
        }
    }

    assert hu.hash_dict(exp_dict_1) == hu.hash_dict(exp_dict_2)
    assert hu.hash_dict(exp_dict_1) != hu.hash_dict(exp_dict_3)
示例#9
0
文件: tests.py 项目: alzayats/haven
    def test_cartesian_product(self):
        # test whether the cartesian product covers all needed variations
        exp_dict_1 = {'dataset': 'mnist', 'model': 'mlp', 'batch_size': 1}
        exp_dict_2 = {'dataset': 'mnist', 'model': 'mlp', 'batch_size': 5}
        exp_dict_3 = {'dataset': 'cifar10', 'model': 'mlp', 'batch_size': 1}
        exp_dict_4 = {'dataset': 'cifar10', 'model': 'mlp', 'batch_size': 5}

        exp_list = [exp_dict_1, exp_dict_2, exp_dict_3, exp_dict_4]
        exp_list_cartesian = hu.cartesian_exp_group({
            'dataset': ['mnist', 'cifar10'],
            'model':
            'mlp',
            'batch_size': [1, 5]
        })

        exp_list_hash = [hu.hash_dict(e) for e in exp_list]
        exp_list_cartesian_hash = [hu.hash_dict(e) for e in exp_list_cartesian]

        # check if the # experiments is correct
        assert (len(exp_list_cartesian_hash) == len(exp_list_hash))

        # check that the hashes in the cartesian are all there
        for h in exp_list_hash:
            assert (h in exp_list_cartesian_hash)

        # check that every hash is unique
        assert (len(exp_list_cartesian_hash) == len(
            np.unique(exp_list_cartesian_hash)))
示例#10
0
文件: tests.py 项目: alzayats/haven
    def test_get_score_lists(self):
        # save a score_list
        savedir_base = '.tmp'
        exp_dict = {
            'model': {
                'name': 'mlp',
                'n_layers': 30
            },
            'dataset': 'mnist',
            'batch_size': 1
        }
        score_list = [{'epoch': 0, 'acc': 0.5}, {'epoch': 0, 'acc': 0.9}]

        hu.save_pkl(
            os.path.join(savedir_base, hu.hash_dict(exp_dict),
                         'score_list.pkl'), score_list)
        hu.save_json(
            os.path.join(savedir_base, hu.hash_dict(exp_dict),
                         'exp_dict.json'), exp_dict)
        # check if score_list can be loaded and viewed in pandas
        exp_list = hr.get_exp_list(savedir_base=savedir_base)

        score_lists = hr.get_score_lists(exp_list, savedir_base=savedir_base)
        assert (score_lists[0][0]['acc'] == 0.5)
        assert (score_lists[0][1]['acc'] == 0.9)

        shutil.rmtree(savedir_base)
示例#11
0
    def test_get_best_exp_dict(self):
        savedir_base = '.tmp'
        exp_dict_1 = {'model':{'name':'mlp', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1}
        score_list = [{'epoch': 0, 'acc':0.5}, {'epoch': 1, 'acc':0.9}]

       
        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict_1),
                     'score_list.pkl'), score_list)

        exp_dict_2 = {'model':{'name':'mlp', 'n_layers':35}, 
                    'dataset':'mnist', 'batch_size':1}
        score_list = [{'epoch': 0, 'acc':0.6}, {'epoch': 1, 'acc':1.9}]

       
        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict_2),
                     'score_list.pkl'), score_list)

        best_exp_list = hu.filter_exp_list([exp_dict_1, exp_dict_2], savedir_base=savedir_base,
                            filterby_list=[({'model.name':'mlp'}, 
                                    {'best':{'avg_across':'run',
                                              'metric':'acc', 
                                              'metric_agg':'max'}}
                                        )])
        assert len(best_exp_list) == 1
        assert best_exp_list[0]['model']['n_layers'] == 35

        best_exp_list = hu.filter_exp_list([exp_dict_1, exp_dict_2], savedir_base=savedir_base,
                            filterby_list=[({'model.name':'mlp'}, 
                                    {'best':{'avg_across':'run',
                                              'metric':'acc', 
                                              'metric_agg':'min'}}
                                        )])
        assert best_exp_list[0]['model']['n_layers'] == 30                                
        # exp 2
        exp_dict_2 = {'model':{'name':'mlp2', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1, 'run':0}
        score_list = [{'epoch': 0, 'acc':1.5}, {'epoch': 1, 'acc':1.8}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict_2),
                     'score_list.pkl'), score_list)
        # exp 3
        exp_dict_3 = {'model':{'name':'mlp2', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1, 'run':1}
        score_list = [{'epoch': 0, 'acc':1.5}, {'epoch': 1, 'acc':1.3}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict_3),
                     'score_list.pkl'), score_list)


        exp_list = [exp_dict_1, exp_dict_2, exp_dict_3]
        best_exp_dict = hu.get_best_exp_dict(exp_list, 
                            savedir_base=savedir_base, metric='acc', 
                            avg_across='run',
                            metric_agg='max',
                            )

        assert(best_exp_dict['model']['name'] == 'mlp2')
示例#12
0
    def launch_exp_list(self,
                        command,
                        exp_list=None,
                        savedir_base=None,
                        reset=0,
                        in_parallel=True):
        exp_list = exp_list or self.exp_list
        assert '<exp_id>' in command

        submit_dict = {}

        if in_parallel:
            pr = hu.Parallel()

            for exp_dict in exp_list:
                exp_id = hu.hash_dict(exp_dict)

                savedir_base = savedir_base or self.savedir_base
                savedir = os.path.join(savedir_base, hu.hash_dict(exp_dict))

                com = command.replace('<exp_id>', exp_id)
                pr.add(self.launch_or_ignore_exp_dict, exp_dict, com, reset,
                       savedir, submit_dict)

            pr.run()
            pr.close()

        else:
            for exp_dict in exp_list:
                exp_id = hu.hash_dict(exp_dict)

                savedir_base = savedir_base or self.savedir_base
                savedir = os.path.join(savedir_base, hu.hash_dict(exp_dict))

                com = command.replace('<exp_id>', exp_id)
                self.launch_or_ignore_exp_dict(exp_dict, com, reset, savedir,
                                               submit_dict)

        if len(submit_dict) == 0:
            raise ValueError(
                'The threads have an error, most likely a permission error (see above)'
            )

        for i, (k, v) in enumerate(submit_dict.items()):
            print('***')
            print('Exp %d/%d - %s' % (i + 1, len(submit_dict), v['message']))
            print('exp_id: %s' % hu.hash_dict(v['exp_dict']))
            pprint.pprint(v['exp_dict'])
            print()

        print("%d experiments submitted." % len(exp_list))
        if len(submit_dict) > 0:
            assert len(submit_dict) == len(
                exp_list), 'considered exps does not match expected exps'
        return submit_dict
示例#13
0
def test_get_plot():
    # save a score_list
    savedir_base = ".tmp"
    exp_dict = {
        "model": {
            "name": "mlp",
            "n_layers": 30
        },
        "dataset": "mnist",
        "batch_size": 1
    }
    score_list = [{"epoch": 0, "acc": 0.5}, {"epoch": 1, "acc": 0.9}]

    hu.save_pkl(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"),
        score_list)
    hu.save_json(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"),
        exp_dict)
    # check if score_list can be loaded and viewed in pandas
    exp_list = hu.get_exp_list(savedir_base=savedir_base)

    fig, axis = hr.get_plot(
        exp_list,
        savedir_base=savedir_base,
        filterby_list=[({
            "model": {
                "name": "mlp"
            }
        }, {
            "style": {
                "color": "red"
            }
        })],
        x_metric="epoch",
        y_metric="acc",
    )
    # fig, axis = hr.get_plot(exp_list,
    #      savedir_base=savedir_base,
    #      x_metric='epoch',
    #      y_metric='acc',
    #      mode='pretty_plot')
    fig, axis = hr.get_plot(exp_list,
                            savedir_base=savedir_base,
                            x_metric="epoch",
                            y_metric="acc",
                            mode="bar")
    fig.savefig(os.path.join(".tmp", "test.png"))

    shutil.rmtree(".tmp")
示例#14
0
def trainval(exp_dict):
    pprint.pprint(exp_dict)

    savedir_base = os.path.join('tmp', hu.hash_dict(exp_dict))
    os.makedirs(savedir_base, exist_ok=True)
    # -- get scenes
    source_scene = scenes.get_scene(exp_dict['source_scene'])
    target_scene = scenes.get_scene(exp_dict['target_scene'])

    # -- get model
    model = models.get_model(exp_dict['model'], source_scene, exp_dict)

    # -- train for E iterations
    score_list = []
    for e in range(500):
        # update parameters and get new score_dict
        score_dict = model.train_on_batch(target_scene)
        score_dict["epoch"] = e
        score_dict["step_size"] = model.opt.state['step_size']

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Print 
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail(), "\n")

        # Visualize
        if e % 50 == 0:
            model.vis_on_batch(target_scene, 
                    fname=os.path.join(savedir_base, 
                            'output_%d.png' % e))

    save_gif(src_path=os.path.join(savedir_base, '*.png'), 
             tgt_fname=s.path.join(savedir_base, 'animation.gif'))
示例#15
0
    def kill_jobs(self):
        add_job_utils()
        import haven_jobs_utils as hju

        hu.check_duplicates(self.exp_list)

        pr = hu.Parallel()
        submit_dict = {}

        for exp_dict in self.exp_list:
            exp_id = hu.hash_dict(exp_dict)
            savedir = os.path.join(self.savedir_base, exp_id)
            fname = hju.get_job_fname(savedir)

            if os.path.exists(fname):
                job_id = hu.load_json(fname)['job_id']
                pr.add(hju.kill_job, self.api, job_id)
                submit_dict[exp_id] = 'KILLED'
            else:
                submit_dict[exp_id] = 'NoN-Existent'

        pr.run()
        pr.close()
        pprint.pprint(submit_dict)
        print("%d/%d experiments killed." % (len([ s for s in submit_dict.values() if 'KILLED' in s]),
                                                len(submit_dict)))
        return submit_dict
    def __getitem__(self, index):
        meta = self.meta_list[index]
        img, mask = load_minibatch_h5(self.path, [meta['index']])
        points = get_point_list(mask)
        if 1:
            char_id_list = [
                i + 1 for i, s in enumerate(meta['symbols'])
                if s['char'] in ['a']
            ]
            # char_id_list = [i+1 for i, s in enumerate(meta['symbols'])]
            mask_out = np.zeros(mask.shape)

            for char_id in char_id_list:
                mask_out[mask == char_id] = 1

            # hu.save_image('tmp.png', mask_out)
        points = points * mask_out

        assert (abs(len(char_id_list) - points.sum()) <= 2)
        img_float = self.img_transform(img[0])
        meta['hash'] = hu.hash_dict({'id': meta['index']})
        meta['shape'] = mask_out.squeeze().shape
        meta['split'] = self.split
        return {
            'images': img_float,
            'points': torch.FloatTensor(points),
            'masks': torch.LongTensor(mask_out),
            'meta': meta
        }
示例#17
0
    def __getitem__(self, index):
        name = self.img_names[index]

        # LOAD IMG, POINT, and ROI
        image = imread(os.path.join(self.path, name + ".jpg"))
        points = imread(os.path.join(self.path,
                                     name + "dots.png"))[:, :, :1].clip(0, 1)
        roi = loadmat(os.path.join(self.path,
                                   name + "mask.mat"))["BW"][:, :, np.newaxis]

        # LOAD IMG AND POINT
        image = image * roi
        image_original = hu.shrink2roi(image, roi)
        points = hu.shrink2roi(points, roi).astype("uint8")

        counts = torch.LongTensor(np.array([int(points.sum())]))

        # collection = list(map(FT.to_pil_image, [image, points]))
        image, points = apply_transform(image_original, points)

        return {
            "images": image,
            # 'images_original':image_original,
            "points": points.squeeze()[None],
            "counts": counts,
            'meta': {
                "index": index,
                'split': self.split,
                'hash': hu.hash_dict({'id': name})
            }
        }
    def __getitem__(self, i):
        out = self.img_list[i]
        img_name, tgt_name, lung_name = out['img'], out['tgt'], out['lung']

        # read image
        img_dcm = pydicom.dcmread(os.path.join(self.img_path, img_name))
        image = img_dcm.pixel_array.astype('float')

        # read infection mask
        tgt_mask = np.array(Image.open(os.path.join(self.tgt_path, tgt_name)).transpose(Image.FLIP_LEFT_RIGHT).rotate(90))
        
        # read lung mask
        lung_mask = np.array(Image.open(os.path.join(self.lung_path, 
                             lung_name)).transpose(Image.FLIP_LEFT_RIGHT))
        mask = np.zeros(lung_mask.shape)
        # mask[lung_mask== 255] = 1
        # mask[tgt_mask== 127] = 2
        # mask[tgt_mask== 255] = 3
        if self.n_classes == 2:
            mask[tgt_mask!= 0] = 1

        elif self.n_classes == 3:
            mask[tgt_mask== 127] = 1
            mask[tgt_mask== 255] = 2
        
        # assert that these are the only classes
        assert(len(np.setdiff1d(np.unique(tgt_mask),[0,127,255] ))==0)
        assert(len(np.setdiff1d(np.unique(lung_mask),[0,255] ))==0)

        # image, mask = transformers.apply_transform(self.split, image=image, label=mask, 
        #                                transform_name=self.exp_dict['dataset']['transform'], 
        #                                exp_dict=self.exp_dict)
        img_uint8 = ((image/4095)*255).astype('uint8')
        image = self.img_transform(Image.fromarray(img_uint8).convert('RGB'))
        mask = self.gt_transform(Image.fromarray((mask).astype('uint8')))
        mask = torch.LongTensor(np.array(mask))

        if self.n_classes == 2:
            assert (len(np.setdiff1d(np.unique(mask), [0, 1])) == 0)
        if self.n_classes == 3:
            assert (len(np.setdiff1d(np.unique(mask), [0, 1, 2])) == 0)

        from src.modules.lcfcn import lcfcn_loss
        points =  lcfcn_loss.get_points_from_mask(mask.numpy().squeeze(), bg_points=-1)
        # if (points == 255).mean() == 1:
        #     points[:] = 0
        return {'images': image, 
                'masks': mask.long()[None],
                'points':torch.LongTensor(points),
                'meta': {'shape':mask.squeeze().shape, 
                        'index':i,
                        'hash':hu.hash_dict({'id':os.path.join(self.img_path, img_name)}),
                          'name':img_name,
                         'slice_thickness':img_dcm.SliceThickness, 
                         'pixel_spacing':str(img_dcm.PixelSpacing), 
                         'img_name': img_name, 
                         'tgt_name':tgt_name,
                         'image_id': i,
                         'split': self.split}}
示例#19
0
    def __getitem__(self, index):
        # index = 0
        img_path = self.dataset.images[index]
        name = os.path.split(img_path)[-1].split('.')[0]

        img_pil = Image.open(img_path).convert("RGB")
        W, H = img_pil.size
        points_list = self.point_dict[name]
        points_mask = np.zeros((H, W))
        for p in points_list:
            if p['y'] >= H or p['x'] >= W:
                continue
            points_mask[int(p['y']), int(p['x'])] = p['cls']
        
        images = torchvision.transforms.ToTensor()(np.array(img_pil))
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]

        images = transforms.Normalize(mean=mean, std=std)(images)
        mask_path = self.dataset.masks[index]

        if '.mat' in mask_path:
            mask_pil = Image.fromarray(hu.load_mat(mask_path)['GTcls'][0]['Segmentation'][0])
        else:
            mask_pil = Image.open(mask_path)
            
        masks= torch.as_tensor(np.array(mask_pil))
        
        
        points_mask[points_mask == 0] = 255
        y_list, x_list = np.where(masks==0)
        if len(y_list):
            for i in range(len(points_list)):
                yi, xi = datasets.get_random(y_list, x_list, seed=i+1)
                points_list += [{'cls':0, 'x':xi, 'y':yi}]
                points_mask[yi, xi] = 0

        # hu.save_image(fname='tmp.png', img=torch.as_tensor(np.array(img_pil))/255., 
        #             points=(points_mask!=255).astype('uint8'))

        batch = {"images": images,
        "img_pil": img_pil,
                'points':torch.as_tensor(points_mask),
                'point_list':points_list,
                # 'inst':inst,
                #  'flipped':flipped,
                 "masks": masks,
                #  "original":inv_transform(images),
                 "meta": {"index": index,
                        'hash':hu.hash_dict({'id':index, 'split':self.split}),
                          "name": self.dataset.images[index],
                          "size": images.shape[-2:],
                          "image_id": index,
                          "split": self.split}}

        return batch
示例#20
0
    def test_get_best_exp_dict(self):
        savedir_base = '.tmp'
        exp_dict_1 = {'model':{'name':'mlp', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1}
        score_list = [{'epoch': 0, 'acc':0.5}, {'epoch': 1, 'acc':0.9}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict_1),
                     'score_list.pkl'), score_list)

        exp_dict_2 = {'model':{'name':'mlp2', 'n_layers':30}, 
                    'dataset':'mnist', 'batch_size':1}
        score_list = [{'epoch': 0, 'acc':0.5}, {'epoch': 1, 'acc':1.2}]

        hu.save_pkl(os.path.join(savedir_base, hu.hash_dict(exp_dict_2),
                     'score_list.pkl'), score_list)

        exp_list = [exp_dict_1, exp_dict_2]
        best_exp_dict = hr.get_best_exp_dict(exp_list, savedir_base=savedir_base, metric='acc', min_or_max='max')

        assert(best_exp_dict['model']['name'] == 'mlp2')
    def __getitem__(self, index):
        image = rgb_loader(self.images[index])
        gt = binary_loader(self.gts[index])

        image = self.img_transform(image)
        gt = self.gt_transform(gt)
        tgt_mask = np.array(gt)

        assert (len(np.setdiff1d(np.unique(tgt_mask), [0, 127, 255])) == 0)

        mask = np.zeros(tgt_mask.shape)
        if self.n_classes == 2:
            mask[tgt_mask != 0] = 1

        elif self.n_classes == 3:
            mask[tgt_mask == 127] = 1
            mask[tgt_mask == 255] = 2
        mask = torch.LongTensor(mask)
        # gt = self.gt_transform(gt)

        # cc = K.CenterCrop((384, 385))
        # image = cc(image)
        # mask = cc(mask[None].float()).long()
        from src.modules.lcfcn import lcfcn_loss
        points = lcfcn_loss.get_points_from_mask(mask.numpy().squeeze(),
                                                 bg_points=-1)
        points = torch.LongTensor(points)
        # hu.save_image('tmp.png', hu.denormalize(image, 'rgb'), points=points, radius=2)
        # hu.save_image('tmp.png', hu.denormalize(image, 'rgb'), mask=gt.numpy(), radius=2)
        if self.n_classes == 2:
            assert (len(np.setdiff1d(np.unique(mask), [0, 1])) == 0)
        if self.n_classes == 3:
            assert (len(np.setdiff1d(np.unique(mask), [0, 1, 2])) == 0)

        # points = cc(torch.LongTensor(points)[None].float()).long()[0]

        batch = {
            'images': image,
            'masks': mask[None],
            'points': points,
            'meta': {
                'name': index,
                'hash': hu.hash_dict({'id': self.images[index]}),
                # 'hash':self.images[index],
                'shape': mask.squeeze().shape,
                'index': index,
                'split': self.split,
                # 'size':self.size
            }
        }

        # return image, gt, name, np.array(F.interpolate(image, gt.size, mode='bilinear'))
        return batch
示例#22
0
def test_get_score_df():
    # save a score_list
    savedir_base = ".tmp"
    exp_dict = {
        "model": {
            "name": "mlp",
            "n_layers": 30
        },
        "dataset": "mnist",
        "batch_size": 1
    }
    exp_dict2 = {
        "model": {
            "name": "mlp2",
            "n_layers": 30
        },
        "dataset": "mnist",
        "batch_size": 1
    }

    score_list = [{"epoch": 0, "acc": 0.5}, {"epoch": 0, "acc": 0.9}]

    hu.save_pkl(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "score_list.pkl"),
        score_list)

    hu.save_json(
        os.path.join(savedir_base, hu.hash_dict(exp_dict), "exp_dict.json"),
        exp_dict)

    hu.save_json(
        os.path.join(savedir_base, hu.hash_dict(exp_dict2), "exp_dict.json"),
        exp_dict)
    # check if score_list can be loaded and viewed in pandas
    exp_list = hu.get_exp_list(savedir_base=savedir_base)
    score_df = hr.get_score_df(exp_list, savedir_base=savedir_base)

    assert np.array(score_df["dataset"])[0].strip("'") == "mnist"

    shutil.rmtree(".tmp")
示例#23
0
    def __getitem__loc(self, index):
        name = self.img_names[index]
        image_pil = Image.open(self.path + "/images/" + name + ".jpg")
        W, H = image_pil.size
        original = copy.deepcopy(image_pil)
        image = self.img_transform(image_pil)
        h, w = image.shape[-2:]
        # get points
        points = Image.open(self.path + "/masks/" + name +
                            ".png")  #[..., np.newaxis]
        # points = self.gt_transform(points)
        points = np.array(points).clip(0, 1)
        points = points.squeeze()

        points_new = np.zeros((h, w))
        y_list, x_list = np.where(points)
        point_list = []
        for y, x in zip(y_list, x_list):
            y_new, x_new = int(y * (h / H)), int(x * (w / W))
            points_new[y_new, x_new] = 1
            point_list += [{'y': y, 'x': x, 'cls': 1}]

        points = points_new

        counts = torch.LongTensor(np.array([int(points.sum())]))
        if (points == -1).all():
            pass
        else:
            assert int(np.count_nonzero(points)) == counts[0]
        assert counts.item() == self.counts[index]

        batch = {
            "images": image,
            'original': original,
            'masks': torch.as_tensor(points).long(),
            "labels": float(self.labels[index] > 0),
            "counts": float(self.counts[index]),
            'size': image_pil.size,
            'point_list': point_list,
            "points": torch.FloatTensor(np.array(points)),
            "meta": {
                "index": index,
                'name': self.images[index],
                'hash': hu.hash_dict({'id': self.images[index]}),
                "image_id": index,
                'habitat': self.habitats[index],
                'size': self.size,
                "split": self.split
            }
        }

        return batch
示例#24
0
    def __getitem__(self, index):
        if self.count_mode:
            return self.__getitem__loc(index)
        # Segmentation
        image = rgb_loader(os.path.join(self.datadir, "Segmentation/images", self.images[index]+'.jpg'))
        gt = binary_loader(os.path.join(self.datadir, "Segmentation/masks", self.gts[index]+'.png'))
        original = copy.deepcopy(image)

        image = self.img_transform(image)
        gt = self.gt_transform(gt)
        gt = np.array(gt)
        gt[gt==255] = 1

        img_size = (gt.shape[0], gt.shape[1])

        # Classification
        # image_other = rgb_loader(os.path.join(self.datadir, "Classification", self.images_other[index] + '.jpg'))
        # image_other = self.img_transform(image_other)
        
        points = lcfcn_loss.get_points_from_mask(gt, bg_points=-1)
        # hu.save_image('tmp.png', hu.denormalize(image, 'rgb'), points=points, radius=2)
        # hu.save_image('tmp.png', hu.denormalize(image, 'rgb'), mask=gt.numpy(), radius=2)
        uniques = np.unique(points)
        point_list = []
        for u in uniques:
            if u == 255:
                continue
            y_list, x_list = np.where(points == u)
            for y, x in zip(y_list, x_list):
                point_list += [{'y': y, 'x':x, 'cls': int(u)}]

        batch = {'images': image,
                #  'image_other': image_other,
                 'original':original,
                 'masks': torch.as_tensor(gt).long(),
                 'points': torch.LongTensor(points),
                 'label' : torch.from_numpy(np.ndarray([self.labels[index]])),
                #  "labels_other": float(self.labels_other[index] > 0),
                 'size': img_size,

                 'point_list':point_list,

                 'meta': {'name': self.images[index],
                          'hash': hu.hash_dict({'id': self.images[index]}),
                          # 'hash':self.images[index],
                          'habitat':self.habitats[index],
                          'shape': gt.squeeze().shape,
                          'index': index,
                          'split': self.split,
                          'size': self.size}}

        return batch
示例#25
0
    def test_checkpoint(self):
        savedir_base = '.results'
        # create exp folder
        exp_dict = {'model':{'name':'mlp', 'n_layers':30}, 'dataset':'mnist', 'batch_size':1}
        savedir = os.path.join(savedir_base, hu.hash_dict(exp_dict))
        hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
        hu.torch_save(os.path.join(savedir, "model.pth"), torch.zeros(10))
        hu.torch_load(os.path.join(savedir, "model.pth"))
        assert(os.path.exists(savedir))

        # delete exp folder
        hc.delete_experiment(savedir)
        assert(not os.path.exists(savedir))

        # check backup folder
        os.rmdir(savedir_base)
示例#26
0
def get_existing_slurm_job_commands(exp_list, savedir_base):
    existing_job_commands = []
    for exp_dict in exp_list:
        exp_id = hu.hash_dict(exp_dict)
        savedir = os.path.join(savedir_base, exp_id)
        file_name = os.path.join(savedir, "job_dict.json")
        if not os.path.exists(file_name):
            continue
        job_dict = hu.load_json(file_name)
        job_id = job_dict["job_id"]
        job_status = hu.subprocess_call(
            "scontrol show job %s" %
            job_id).split("JobState=")[1].split(" ")[0]
        if job_status == "RUNNING" or job_status == "PENDING":
            existing_job_commands += [job_dict["command"]]

    return existing_job_commands
示例#27
0
    def _submit_job(self, exp_dict, command, reset, submit_dict={}):
        """Submit one job.

        It checks if the experiment exist and manages the special casses, e.g.,
        new experiment, reset, failed, job is already running, completed
        """
        add_job_utils()
        import haven_jobs_utils as hju

        # Define paths
        savedir = os.path.join(self.savedir_base, hu.hash_dict(exp_dict))
        fname = hju.get_job_fname(savedir)
        
        if not os.path.exists(fname):
            # Check if the job already exists
            job_dict = self.launch_job(exp_dict, savedir, command, job=None)
            job_id = job_dict['job_id']
            message = "SUBMITTED: Launching"

        elif reset:
            # Check if the job already exists
            job_id = hu.load_json(fname).get("job_id")
            hju.kill_job(self.api, job_id)
            hc.delete_and_backup_experiment(savedir)

            job_dict = self.launch_job(exp_dict, savedir, command, job=None)
            job_id = job_dict['job_id']
            message = "SUBMITTED: Resetting"

        else:
            job_id = hu.load_json(fname).get("job_id")
            job = hju.get_job(self.api, job_id)

            if job.alive or job.state == 'SUCCEEDED':
                # If the job is alive, do nothing
                message = 'IGNORED: Job %s' % job.state
                
            elif job.state in ["FAILED", "CANCELLED"]:
                message = "SUBMITTED: Retrying %s Job" % job.state
                job_dict = self.launch_job(exp_dict, savedir, command, job=job)
                job_id = job_dict['job_id']
            # This shouldn't happen
            else:
                raise ValueError('wtf')
        
        submit_dict[job_id] = message
示例#28
0
    def submit_jobs(self, job_command, reset=0):
        hu.check_duplicates(self.exp_list)

        pr = hu.Parallel()
        submit_dict = {}

        for exp_dict in self.exp_list:
            exp_id = hu.hash_dict(exp_dict)
            
            command = job_command.replace('<exp_id>', exp_id)
            pr.add(self._submit_job, exp_dict, command, reset, submit_dict)

        pr.run()
        pr.close()
        pprint.pprint(submit_dict)
        print("%d/%d experiments submitted." % (len([ s for s in submit_dict.values() if 'SUBMITTED' in s]),
                                                len(submit_dict)))
        return submit_dict
示例#29
0
def save_example_results(savedir_base="results"):
    import os
    import pandas
    import requests
    import io
    import matplotlib.pyplot as plt

    from .. import haven_results as hr
    from .. import haven_utils as hu
    from PIL import Image

    # create hyperparameters
    exp_list = [{
        "dataset": "mnist",
        "model": "mlp",
        "lr": lr
    } for lr in [1e-1, 1e-2, 1e-3]]

    for i, exp_dict in enumerate(exp_list):
        # get hash for experiment
        exp_id = hu.hash_dict(exp_dict)

        # add scores for loss, and accuracy
        score_list = []
        for e in range(1, 10):
            score_list += [{
                "epoch": e,
                "loss": 1 - e * exp_dict["lr"] * 0.9,
                "acc": e * exp_dict["lr"] * 0.1
            }]
        # save scores and images
        hu.save_json(os.path.join(savedir_base, exp_id, "exp_dict.json"),
                     exp_dict)
        hu.save_pkl(os.path.join(savedir_base, exp_id, "score_list.pkl"),
                    score_list)

        url = "https://raw.githubusercontent.com/haven-ai/haven-ai/master/haven/haven_examples/data/%d.png" % (
            i + 1)
        response = requests.get(url).content
        img = plt.imread(io.BytesIO(response), format="JPG")
        hu.save_image(os.path.join(savedir_base, exp_id, "images/1.png"),
                      img[:, :, :3])
示例#30
0
    def test_get_job_stats_logs_errors(self):
        # return
        exp_list = [{
            'model': {
                'name': 'mlp',
                'n_layers': 30
            },
            'dataset': 'mnist',
            'batch_size': 1
        }]
        savedir_base = '/mnt/datasets/public/issam/tmp'
        job_config = {
            'volume': ['/mnt:/mnt'],
            'image': 'images.borgy.elementai.net/issam.laradji/main',
            'bid': '1',
            'restartable': '1',
            'gpu': '1',
            'mem': '20',
            'cpu': '2',
        }
        run_command = ('python example.py -ei <exp_id> -sb %s' %
                       (savedir_base))

        hjb.run_exp_list_jobs(exp_list,
                              savedir_base=savedir_base,
                              workdir=os.path.dirname(
                                  os.path.realpath(__file__)),
                              run_command=run_command,
                              job_config=job_config,
                              force_run=True,
                              wait_seconds=0)
        assert (os.path.exists(
            os.path.join(savedir_base, hu.hash_dict(exp_list[0]),
                         'borgy_dict.json')))
        jm = hjb.JobManager(exp_list=exp_list, savedir_base=savedir_base)
        jm_summary_list = jm.get_summary()
        rm = hr.ResultManager(exp_list=exp_list, savedir_base=savedir_base)
        rm_summary_list = rm.get_job_summary()
        assert (rm_summary_list['table'].equals(jm_summary_list['table']))

        jm.kill_jobs()
        assert ('CANCELLED' in jm.get_summary()['status'][0])