def compare(model1, model2, validation, max_iter=-1):
    for m in [model1, model2]:
        g = Gym(m,
                device,
                lambda i, t: EMD.torch_auto(i, t, False),
                None,
                validation,
                max_validation_steps=max_iter)
        yield np.hstack(g.validation_loss())
Пример #2
0
def val_loss_and_auc(model:Gym):
    loss_func = lambda p, t: EMD.torch_auto(p, t, mean=False)
    val_losses = np.hstack(model.validation_loss(loss_func))  # restack batches
    names = model.data_val.dataset.df["MC_name"].values
    truth = (names != "valid").astype("int")
    pred = val_losses
    fpr, tpr, _ = metrics.roc_curve(truth, pred)
    auc = metrics.auc(fpr, tpr)
    return val_losses, auc
Пример #3
0
 def __init__(
     self,
     name,
     gym_factory,
     auc_classes=None,
     batches=1000,
     val_loss_func=lambda p, t: EMD.torch_auto(p, t, mean=False),
     num_workers=1,
     add_attributes={},
     catch_ctrl_c=False,
 ):
     super().__init__(name, num_workers, catch_ctrl_c)
     self.gym_factory = gym_factory
     self.batches = batches
     self.val_loss_func = val_loss_func
     self.auc_classes = auc_classes
     self.add_attributes = add_attributes
Пример #4
0
    def _setup(self, config):
        # controls how often workers report model performance
        name = config.get('model_factory',None)
        self.model_factory = mappings.models[name]
        self.model = self.model_factory(config)
        
        self.batches_per_step = config.get('batches_per_step',1)
        
        self.max_validation_steps = config.get('max_validation_steps',10)
        
        # GPU or CPU?
        use_cuda = config.get("use_gpu") and torch.cuda.is_available()
        print("CUDA:",use_cuda)
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.model.to(self.device)

        self.batch_size = config.get("batch_size", None)

        self.verbose = config.get("verbose", False)

        # Abstract: implement self.model

        optimizer = config.get("optimizer", "Adam")
        if optimizer == "SGD":
            self.optimizer = optim.SGD(
                self.model.parameters(),
                lr=config.get("lr", 0.01),
                momentum=config.get("momentum", 0.9),
            )
        elif optimizer == "Adam":
            self.optimizer = optim.Adam(self.model.parameters())
        else:
            raise NotImplemented()
        
        self.validation_loss_F = lambda p, t: EMD.torch_auto(p, t, mean=False)
        
        loss_name = config.get("training_loss", "mse_loss")
        self.train_loss_F = mappings.losses[loss_name]

best_loss = argmedian(runs.loss_train)
best_auc = np.argmax(runs.auc)
gym = training.best_model_factory()
model = gym.model
model.load_state_dict(runs.model[best_loss].to_dict())
model.eval()

# %%
losses = []
for d in tqdm(data):
    d = d["data"]
    d = d.to(device)
    pred = model(d)
    loss = EMD.torch_auto(pred, d, False).detach().flatten().cpu().numpy()
    losses.append(loss)
losses = np.hstack(losses)

# %%
unique_seps = np.unique(separations)
loss_valid = losses[separations == 0]

aucs = []
for sep in unique_seps:
    if sep == 0: continue
    loss_peak = losses[separations == sep]
    classes = [0] * len(loss_valid) + [1] * len(loss_peak)
    auc = analysis.calc_auc(np.hstack((loss_valid, loss_peak)), classes)
    aucs.append(auc)
Пример #6
0
def ks_test(observation_pdf, pdf):
    #observ_cdf = np.cumsum(observation_pdf)
    #cdf = np.cumsum(pdf)
    ks_stat, p_value = stats.ks_2samp(observation_pdf.reshape(-1),pdf.reshape(-1))
    return p_value
    #return np.max(np.abs(observ_cdf-cdf))

all_ks = []
losses_val = []
val_classes = []
for d in tqdm(model.data_val):
    data = d['data']
    type = d['MC_type']
    data = data.to(model.device)
    pred = model.model(data).detach()
    losses_val.append(EMD.torch_auto(pred,data,False).cpu().numpy())
    pred = pred.cpu().numpy()
    data = data.cpu().numpy()
    
    val_classes.append(type)
    ks = [ks_test(data[i], pred[i]) for i in range(len(pred))]
    all_ks.append(ks)
ks = np.hstack(all_ks)
losses_val = np.hstack(losses_val)
val_classes = np.hstack(val_classes)

#%%
calc_auc(losses_val,val_classes!=0)
#%%
interactive.save_value("AUC for KS as metric", calc_auc(-ks,val_classes!=0),".2f")
# %%
Пример #7
0
        except:
            modelname = model.model.__class__.__name__
        name = f"{i}, Model:{modelname}, Loss: {lossname}"
        print(f"Training {name}:")

        loss = model.train_batches(steps)

        x = np.linspace(0, (steps + 1) * dl_config["batch_size"], len(loss))
        plt.plot(x, loss)
        plt.xlabel("# of waveforms used for training")
        plt.ylabel(f"loss {lossname}")
        plt.xscale("log")
        plt.figtext(0, 0, name)
        plt.show_and_save(f"{name} + training")

        loss_func = lambda p, t: EMD.torch_auto(p, t, mean=False)
        val_losses = np.hstack(
            model.validation_loss(loss_func))  # restack batches
        names = dataset_val.df["MC_name"].values

        _, bins, _ = plt.hist(val_losses,
                              int(np.sqrt(len(val_losses))),
                              label="everything")
        plt.clf()

        unames = np.unique(names)
        data = [val_losses[names == name] for name in unames]
        plt.hist(data, bins=bins, label=unames, stacked=True)
        plt.xlabel("EMD loss")
        plt.ylabel("frequency")
        plt.legend()