예제 #1
0
def get_probability_and_backpointer(transition_probability,
                                    emission_probability,
                                    old_prev_states,
                                    current_word, prev_prob):
    decoded = {}
    for curr_state in utils.get_or_default(emission_probability, [current_word], list(transition_probability.keys())):
        max_tag_prob = 0
        back_pointer = ""

        for prev_state in old_prev_states:
            curr_state_prob = utils.get_or_default(transition_probability, [prev_state, curr_state], 0) \
                              * utils.get_or_default(emission_probability, [current_word, curr_state], 1) \
                              * utils.get_or_default(prev_prob, [prev_state, 'prob'], 0)

            if curr_state_prob > max_tag_prob:
                max_tag_prob = curr_state_prob
                back_pointer = prev_state

            if max_tag_prob == 0:
                print('come here')

        if max_tag_prob != 0:
            decoded[curr_state] = {'prob': max_tag_prob, 'parent': back_pointer}
        else:
            if max_tag_prob < 0:
                print("Problem with %s" % current_word)

    return decoded
예제 #2
0
파일: linear.py 프로젝트: xnlp/RNNsearch
 def __init__(self, **kwargs):
     self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX)
     self.scope = get_or_default(kwargs, "scope", "linear")
     self.concat = get_or_default(kwargs, "concat", False)
     self.multibias = get_or_default(kwargs, "multibias", False)
     self.bias = option(use=True, initializer=zeros_initializer)
     self.weight = option(output_major=False,
                          initializer=uniform_initializer)
예제 #3
0
 def __init__(self, **kwargs):
     self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX)
     self.scope = get_or_default(kwargs, "scope", "rnnsearch")
     self.source_embedding = embedding_config(dtype=self.dtype,
                                              scope="source_embedding")
     self.target_embedding = embedding_config(dtype=self.dtype,
                                              scope="target_embedding")
     self.encoder = encoder_config(dtype=self.dtype)
     self.decoder = decoder_config(dtype=self.dtype)
예제 #4
0
 def __init__(self, **kwargs):
     self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX)
     self.scope = get_or_default(kwargs, "scope", "feedforward")
     self.activation = get_or_default(kwargs, "activation",
                                      theano.tensor.nnet.sigmoid)
     self.concat = get_or_default(kwargs, "concat", False)
     self.multibias = get_or_default(kwargs, "multibias", False)
     self.bias = option(use=True, initializer=zeros_initializer)
     self.weight = option(output_major=False,
                          initializer=uniform_initializer)
예제 #5
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        trn_params = cfg['train_params']
        self.fold = get_or_default(trn_params, 'fold', 0)
        self.batch_size = get_or_default(trn_params, 'batch_size', 16)
        self.num_workers = get_or_default(trn_params, 'num_workers', 2)
        self.train_path = get_or_default(trn_params, 'train_path', 'input/crops256/train/')
        self.mask_path = get_or_default(trn_params, 'masks_path', 'input/crops256/masks/')

        data_path = Path(get_or_default(trn_params, 'data_path', '../input/hubmap-kidney-segmentation'))
        data_path_zarr = Path(get_or_default(trn_params, 'data_path_zarr', '../input/hubmap-zarr/train_scale2'))
        mask_preproc_dir = get_or_default(trn_params, 'mask_preproc_dir',
                                          '/kaggle/input/hubmap-labels-pdf-0-5-0-25-0-01/masks_scale2')

        backbone = cfg['model']['backbone']
        encoder_weights = get_or_default(cfg['model'], 'weights', 'imagenet')

        if cfg['model']['type'] == 'Unet':
            self.model = smp.Unet(encoder_name=backbone, classes=1, encoder_weights=encoder_weights)
        else:
            raise Exception(cfg['model']['name'] + ' not supported')
        self.crit = symmetric_lovasz

        df_train = pd.read_csv(data_path / 'train.csv')
        df_info = pd.read_csv(data_path / 'HuBMAP-20-dataset_information.csv')

        files = [x for x in data_path_zarr.iterdir() if x.is_dir() if not x.name.startswith('.')]
        label_fn = lambda o: o

        cfg = CONFIG()

        aug = alb.Compose([
            alb.OneOf([
                alb.HueSaturationValue(10, 15, 10),
                alb.CLAHE(clip_limit=2),
                alb.RandomBrightnessContrast(),
            ], p=0.3),
            alb.Normalize(p=1, std=[0.15167958, 0.23584107, 0.13146145],
                          mean=[0.65459856, 0.48386562, 0.69428385])])

        ds_kwargs = {
            'tile_shape': cfg.tile_shape,
            'padding': cfg.padding,
            'scale': cfg.scale,
            'n_jobs': cfg.n_jobs,
            'preproc_dir': mask_preproc_dir,
            'val_length': cfg.val_length,
            'sample_mult': cfg.sample_mult,
            'loss_weights': False,
            'zoom_sigma': cfg.zoom_sigma,
            'flip': cfg.flip,
            'max_rotation': cfg.max_rotation,
            'deformation_grid_size': cfg.deformation_grid_size,
            'deformation_magnitude': cfg.deformation_magnitude,
            'albumentations_tfms': aug
        }

        self.train_ds = RandomTileDataset(files, label_fn=label_fn, **ds_kwargs)
        self.valid_ds = TileDataset(files, label_fn=label_fn, **ds_kwargs, is_zarr=True)
예제 #6
0
 def __init__(self, **kwargs):
     self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX)
     self.scope = get_or_default(kwargs, "scope", "gru")
     self.concat = get_or_default(kwargs, "concat", False)
     self.activation = get_or_default(kwargs, "activation",
                                      theano.tensor.tanh)
     self.gates = feedforward_config(dtype=self.dtype, scope="gates")
     self.reset_gate = feedforward_config(dtype=self.dtype,
                                          scope="reset_gate")
     self.update_gate = feedforward_config(dtype=self.dtype,
                                           scope="update_gate")
     self.candidate = linear_config(dtype=self.dtype, scope="candidate")
예제 #7
0
파일: distil_crits.py 프로젝트: dodler/kgl
def get_crit(trn_params):

    crit_name = get_or_default(trn_params, 'crit', 'HardDarkRank')

    if crit_name == 'HardDarkRank':
        dark_alpha = float(get_or_default(trn_params, 'dark_alpha', 2))
        dark_beta = float(get_or_default(trn_params, 'dark_alpha', 3))
        dark_crit = HardDarkRank(alpha=dark_alpha, beta=dark_beta)
        return dark_crit
    elif crit_name == 'TopnMSELoss':
        topk = float(get_or_default(trn_params, 'topk', 5))
        return TopnMSELoss(k=topk)
    else:
        raise Exception(' crit {} is not supported'.format(crit_name))
예제 #8
0
파일: main.py 프로젝트: dodler/kgl
    def __init__(self, cfg, fold):
        super().__init__()
        LOCAL_RANK = int(os.environ.get("GLOBAL_RANK", 0))
        seed_everything(42 + LOCAL_RANK)
        self.cfg = cfg
        self.model = get_train_efficientdet()
        self.fold = fold
        self.df = pd.read_csv('train_folds_only_pos.csv')
        trn_params = cfg['train_params']
        self.img_size = get_or_default(trn_params, 'img_size', 512)
        self.batch_size = get_or_default(trn_params, 'batch_size', 16)
        self.num_workers = get_or_default(trn_params, 'num_workers', 4)

        print('using fold', self.fold)
예제 #9
0
 def __init__(self, **kwargs):
     self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX)
     self.scope = get_or_default(kwargs, "scope", "decoder")
     self.init_transform = feedforward_config(dtype=self.dtype,
                                              scope="init_transform",
                                              activation=theano.tensor.tanh)
     self.annotation_transform = linear_config(dtype=self.dtype,
                                               scope="annotation_transform")
     self.state_transform = linear_config(dtype=self.dtype,
                                          scope="state_transform")
     self.context_transform = linear_config(dtype=self.dtype,
                                            scope="context_transform")
     self.rnn = gru_config(dtype=self.dtype, scope="rnn")
     self.maxout = maxout_config(dtype=self.dtype, scope="maxout")
     self.deepout = linear_config(dtypde=self.dtype, scope="deepout")
     self.classify = linear_config(dtype=self.dtype, scope="classify")
예제 #10
0
 def age(self):
     if self._age is None:
         age_data = utils.get_or_default(self["clin_shared:days_to_birth"],
                                         '#text')
         if age_data != 'UNK':
             age_data = -int(age_data) / 365.25
         self._age = age_data
     return self._age
예제 #11
0
    def __init__(self, cfg, fold=0):
        super().__init__()
        self.fold = fold
        self.cfg = cfg
        trn_params = cfg['train_params']

        self.batch_size = get_or_default(trn_params, 'batch_size', 16)
        self.num_workers = get_or_default(trn_params, 'num_workers', 2)
        self.aug_type = get_or_default(cfg, 'aug', '0')
        self.margin_start = get_or_default(cfg, 'margin_start', 10)
        self.csv_path = get_or_default(cfg, 'csv_path',
                                       'input/train_folds.csv')
        self.trn_path = get_or_default(cfg, 'image_path', 'input/train')
        self.le = LabelEncoder()
        train = pd.read_csv(self.csv_path)
        self.le.fit(train.label_group)
        model = get_or_default(cfg, 'model', 'ShopeeModelTimm')
        self.model = model
        num_classes = len(self.le.classes_)

        if model == 'ShopeeModelTimm':
            self.model = ShopeeModelTimm(num_classes, backbone=cfg['backbone'])
        elif model == 'ShopeeModelResnext':
            self.model = ShopeeModelResnext(num_classes=num_classes)
        else:
            raise Exception('unsupported model {}'.format(model))

        self.crit = nn.CrossEntropyLoss()
        self.acc = Accuracy()
        print('using fold', self.fold)
예제 #12
0
def extract_tags(probs_with_backpointers):
    res = []
    prev_top_tag = ""
    top_tag = get_top_tag(probs_with_backpointers[-1])

    for dict in reversed(probs_with_backpointers):
        prev_top_tag = top_tag
        word = list(dict.keys())[0]
        res.append((word, top_tag))
        # TODO: Default to prev tags top_tag
        # top_tag = dict[word][top_tag]['parent']
        top_tag = utils.get_or_default(dict, [word, top_tag, 'parent'], prev_top_tag)

    return res
예제 #13
0
    def train_dataloader(self):

        trn_aug = get_aug(cfg=self.cfg)

        train = pd.read_csv(self.csv_path)
        trn_ds = RanzcrDs(df=train[train.fold != self.fold].reset_index().drop(
            'index', axis=1).drop('fold', axis=1),
                          aug=trn_aug,
                          path=self.trn_path,
                          logits_path=get_or_default(cfg, 'logits',
                                                     'logits.npy'))

        trn_dl = torch.utils.data.DataLoader(trn_ds,
                                             shuffle=True,
                                             batch_size=self.batch_size,
                                             num_workers=self.num_workers)
        return trn_dl
예제 #14
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.model = RanzcrModel(cfg=cfg)
        trn_params = cfg['train_params']
        self.fold = get_or_default(trn_params, 'fold', 0)
        self.batch_size = get_or_default(trn_params, 'batch_size', 16)
        self.num_workers = get_or_default(trn_params, 'num_workers', 2)
        self.aug_type = get_or_default(cfg, 'aug', '0')
        self.csv_path = get_or_default(cfg, 'csv_path',
                                       'input/train_folds.csv')
        self.trn_path = get_or_default(cfg, 'image_path', 'input/train')

        self.crit = nn.BCEWithLogitsLoss()
        print('using fold', self.fold)
예제 #15
0
    def __init__(self, cfg, fold=0):
        super().__init__(cfg=cfg, fold=fold)

        trn_params = cfg['train_params']
        backbone = get_or_default(cfg['model'], 'backbone',
                                  'tf_efficientnet_b4_ns')
        self.model = timm.create_model(backbone,
                                       pretrained=True,
                                       num_classes=4,
                                       in_chans=1)

        self.seg_supervision = bool(
            get_or_default(trn_params, 'seg_supervision', False))
        self.seg_supervision_weight = float(
            get_or_default(trn_params, 'seg_supervision_weight', 0.1))
        self.grad_starvation_fix = bool(
            get_or_default(trn_params, 'grad_starvation_fix', False))
        self.grad_rev = bool(get_or_default(trn_params, 'grad_rev', True))

        if 'mixup' in trn_params:
            self.mixup = True
            self.mixup_alpha = float(
                get_or_default(trn_params, 'mixup_alpha', 1.0))
        else:
            self.mixup = False
            self.mixup_alpha = 0.0

        self.seg_head = get_seg_head(in_channels=feat_map_size[backbone],
                                     out_channels=1)
        self.seg_crit = torch.nn.BCEWithLogitsLoss()

        self.dom_head = nn.Sequential(
            GradientReversal(),
            nn.Linear(feat_map_size[backbone], 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 2),
            nn.ReLU(),
        )
        self.acc_metric = Accuracy()
        self.df = pd.read_csv(get_or_default(cfg['train_params'], 'csv', None))
        self.crit = torch.nn.CrossEntropyLoss()
예제 #16
0

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, required=True)
    parser.add_argument('--resume', type=str, required=False)
    args = parser.parse_args()

    cfg = benedict.from_yaml(args.config)
    module = RanzcrModule(cfg=cfg)

    early_stop = EarlyStopping(monitor='val/_avg_roc_auc',
                               verbose=True,
                               patience=20,
                               mode='max')
    logger = TensorBoardLogger("lightning_logs", name=args.config)
    lrm = LearningRateMonitor()
    mdl_ckpt = ModelCheckpoint(monitor='val/_avg_roc_auc',
                               save_top_k=3,
                               mode='max')
    precision = get_or_default(cfg, 'precision', 32)
    grad_clip = float(get_or_default(cfg, 'grad_clip', 0))
    trainer = pl.Trainer(gpus=1,
                         max_epochs=100,
                         callbacks=[early_stop, lrm, mdl_ckpt],
                         logger=logger,
                         precision=precision,
                         gradient_clip_val=grad_clip)

    trainer.fit(module)
예제 #17
0
 def tumor_tissue_site(self):
     if self._tumor_tissue_site is None:
         self._tumor_tissue_site = utils.get_or_default(
             self['clin_shared:tumor_tissue_site'], '#text')
     return self._tumor_tissue_site
예제 #18
0
 def pathologic_stage(self):
     if self._pathologic_stage is None:
         self._pathologic_stage = utils.get_or_default(
             self['shared_stage:stage_event']
             ['shared_stage:pathologic_stage'], '#text')
     return self._pathologic_stage
예제 #19
0
 def gender(self):
     if self._gender is None:
         self._gender = utils.get_or_default(self['shared:gender'], '#text')
     return self._gender
예제 #20
0
 def histological_type(self):
     if self._histological_type is None:
         self._histological_type = utils.get_or_default(
             self['shared:histological_type'], '#text')
     return self._histological_type
예제 #21
0
 def clinical_stage(self):
     if self._clinical_stage is None:
         self._clinical_stage = utils.get_or_default(
             self['shared_stage:stage_event']
             ['shared_stage:clinical_stage'], '#text')
     return self._clinical_stage
예제 #22
0
 def __init__(self, **kwargs):
     self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX)
     self.scope = get_or_default(kwargs, "scope", "encoder")
     self.forward_rnn = gru_config(dtype=self.dtype, scope="forward_rnn")
     self.backward_rnn = gru_config(dtype=self.dtype, scope="backward_rnn")
예제 #23
0
        return optimizer

    def train_dataloader(self):
        dl = DataLoader(self.train_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        return dl

    def val_dataloader(self):
        dl = DataLoader(self.valid_ds, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers)
        return dl


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, required=True)
    parser.add_argument('--resume', type=str, required=False)
    args = parser.parse_args()

    cfg = benedict.from_yaml(args.config)
    module = HubmapModule(cfg)

    early_stop = EarlyStopping(monitor='val/avg_dice', verbose=True, patience=50, mode='max')
    logger = TensorBoardLogger("lightning_logs", name=args.config)
    lrm = LearningRateMonitor()
    mdl_ckpt = ModelCheckpoint(monitor='val/avg_dice', save_top_k=5, )
    precision = get_or_default(cfg, 'precision', 32)
    clip_grad = get_or_default(cfg, 'cril_grad', 0.0)
    trainer = pl.Trainer(gpus=1, max_epochs=200, callbacks=[early_stop, lrm, mdl_ckpt], logger=logger,
                         precision=precision, gradient_clip_val=clip_grad)

    trainer.fit(module)
예제 #24
0
 def __init__(self, **kwargs):
     self.dtype = get_or_default(kwargs, "dtype", theano.config.floatX)
     self.scope = get_or_default(kwargs, "scope", "embedding")
     self.initializer = get_or_default(kwargs, "initializer",
                                       uniform_initializer)
     self.bias = option(use=True, initializer=zeros_initializer)