Пример #1
0
def fashion_mnist():
    (X_train_full,
     y_train_full), (_, _) = keras.datasets.fashion_mnist.load_data()
    X_train_full = X_train_full.astype(np.float32) / 255.0
    X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
    y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=(28, 28)),
        keras.layers.Dense(100, activation="relu"),
        keras.layers.Dense(10, activation="softmax"),
    ])

    n_epochs = 5
    batch_size = 32
    n_steps = len(X_train) // batch_size
    optimizer = keras.optimizers.Nadam(lr=0.01)
    loss_fn = keras.losses.sparse_categorical_crossentropy
    mean_loss = keras.metrics.Mean()
    metrics = [keras.metrics.SparseCategoricalAccuracy()]

    with trange(1, n_epochs + 1, desc="All epochs") as epochs:
        for epoch in epochs:
            with trange(1,
                        n_steps + 1,
                        desc="Epoch {}/{}".format(epoch, n_epochs)) as steps:
                for _ in steps:
                    X_batch, y_batch = random_batch(X_train, y_train)
                    with tf.GradientTape() as tape:
                        y_pred = model(X_batch)
                        main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                        loss = tf.add_n([main_loss] + model.losses)
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(
                        zip(gradients, model.trainable_variables))
                    for variable in model.variables:
                        if variable.constraint is not None:
                            variable.assign(variable.constraint(variable))
                    status = OrderedDict()
                    mean_loss(loss)
                    status["loss"] = mean_loss.result().numpy()
                    for metric in metrics:
                        metric(y_batch, y_pred)
                        status[metric.name] = metric.result().numpy()
                    steps.set_postfix(status)
                y_pred = model(X_valid)
                status["val_loss"] = np.mean(loss_fn(y_valid, y_pred))
                status["val_accuracy"] = np.mean(
                    keras.metrics.sparse_categorical_accuracy(
                        tf.constant(y_valid, dtype=np.float32), y_pred))
                steps.set_postfix(status)
            for metric in [mean_loss] + metrics:
                metric.reset_states()
    def CalculateUniqueEnvironmentsOld(self):
        """ Calculate unique Environments

        This algorithm to calculate unique environments compares all possible
        permutations of the environments. It is thus always correct but often
        very slow.

        The number of operations scales roughly as as N^2 * M! where
        N is the total number of environments and M is the number of atoms in
        the environments.

        CalculateUniqueEnvironmentsFast is a faster alternative to this
        function.

        Args:
            none

        Returns:
            no value
        """

        num_of_templates=self.allEnvs.shape[0]
        flag_unique=np.ones(num_of_templates)
        same_as=np.linspace(0,num_of_templates-1,num_of_templates)
        # Disregard empty environments
        for i in range(num_of_templates):
            if (self.allEnvs[i].indeces.shape[0]==0):
                flag_unique[i]=0
        for i in trange(num_of_templates,desc="Environment 1", leave=False):
            if ( self.allEnvs[i].indeces.shape[0]>0 and flag_unique[i]==1):
                for j in trange(i+1,num_of_templates,desc="Environment 2", leave=False):
                    # Not empty,  unique, and same number of neighbors
                    if ( self.allEnvs[j].indeces.shape[0]>0 and flag_unique[j]==1 and self.allEnvs[i].indeces.shape[0]==self.allEnvs[j].indeces.shape[0]):
                        # Compare against all permutations
                        #for perm in tqdm(permutations(np.arange(self.allEnvs[j].indeces.shape[0])),total=np.math.factorial(self.allEnvs[j].indeces.shape[0]),desc="Permutations of environment", leave=None):
                        for perm in permutations(np.arange(self.allEnvs[j].indeces.shape[0])):
                            p=np.array(perm)
                            # If both have the same vectors, then the second environment is not unique
                            if (np.count_nonzero(np.isclose(self.allEnvs[i].delta,self.allEnvs[j].delta[np.array(p),:],atol=self.tolerance))==self.allEnvs[i].indeces.shape[0]*3 ):
                                flag_unique[j]=0
                                same_as[j] = i
                                break
        self.uniqueEnvs = np.ndarray((0,),dtype=np.object)
        self.degeneracy = np.ndarray((0,),dtype=np.object)
        for i in range(num_of_templates):
            if (flag_unique[i]==1):
                self.uniqueEnvs = np.append(self.uniqueEnvs,self.allEnvs[i])
                self.degeneracy = np.append(self.degeneracy,np.sum(np.ones(num_of_templates)[same_as==i]))
Пример #3
0
def linkhdf5_one_chrom(chrom, name, cell_id_splits, temp_dir, impute_list, name2=None):
	f = h5py.File(os.path.join(temp_dir, "%s_%s.hdf5" % (chrom, name)), "w")
	if name2 is not None:
		f1 = h5py.File(os.path.join(temp_dir, "%s_%s.hdf5" % (chrom, name2)), "r")
	bar = trange(len(np.concatenate(cell_id_splits)))
	for i, ids in enumerate(cell_id_splits):
		ids = np.copy(ids)
		with h5py.File(os.path.join(temp_dir, "%s_%s_part_%d.hdf5" % (chrom, name, i)), "r") as input_f:
			if i == 0:
				f.create_dataset('coordinates', data=input_f['coordinates'])
			for cell in ids:
				try:
					v1 = np.array(input_f["cell_%d" % (cell)])
					if name2 is not None:
						v2 = np.array(f1["cell_%d" % (cell)])
						v = v1 / np.mean(v1) + v2 / np.mean(v2)
					else:
						v = v1 / np.mean(v1)
					f.create_dataset('cell_%d' % cell, data=v, compression="gzip", compression_opts=6)
				except:
					pass
				bar.update(1)

	f.close()
	if name2 is not None:
		f1.close()
	print ("start removing temp files")
	for i in range(len(cell_id_splits)):
		os.remove(os.path.join(temp_dir, "%s_%s_part_%d.hdf5" % (chrom, name, i)))
Пример #4
0
def source_separation(Xfn,
                      max_iter=1000,
                      step_size=1e-2,
                      tol=1e-6,
                      history=False):
    Yfn = np.zeros_like(Xfn)
    Wf = np.zeros((Xfn.shape[1], Xfn.shape[0], Xfn.shape[0]), dtype=Xfn.dtype)
    ratio = 0
    n = Xfn.shape[1]
    for i in trange(n, desc='freqs'):
        Xn = Xfn[:, i, :]

        V, Z = whitening(Xn)
        U, _ = complex_FastICA(Z, max_iter=max_iter, history=history)
        W, Y, conv = MLE(Xn,
                         V,
                         U,
                         max_iter=max_iter,
                         step_size=step_size,
                         tol=tol,
                         history=history)

        Yfn[:, i, :] = Y
        Wf[i, :, :] = W
        ratio += conv / n
    print("Convergence ratio: {:.2f}%".format(100 * ratio))
    return Wf, Yfn, ratio
Пример #5
0
    def __call__(self, train_ds, val_ds, model, optimizer, scheduler):
        avail_device = get_avail_device()

        with trange(self.epochs) as monitor:

            for epoch in monitor:
                train_loss, val_loss, val_score = [], [], []
                monitor.set_description("Epoch %s" % epoch)

                model.train()
                # Training
                for batch_id, (batch, label) in enumerate(
                        tqdm(train_ds,
                             position=0,
                             desc="Training",
                             leave=False)):
                    batch, label = batch.to(avail_device), label.to(
                        avail_device)
                    loss, _ = model.run(batch, label)

                    loss.backward()
                    train_loss.append(loss.item())

                    optimizer.step()
                    optimizer.zero_grad()
                    scheduler.step()

                model.eval()
                # Validate
                with torch.no_grad():
                    for batch_id, (batch, label) in enumerate(
                            tqdm(val_ds,
                                 position=0,
                                 desc="Validation",
                                 leave=False)):
                        batch, label = batch.to(avail_device), label.to(
                            avail_device)
                        loss, score = model.run(batch, label)

                        val_score.append(score)
                        val_loss.append(loss.item())

                ep_train_loss = np.mean(train_loss)
                ep_val_loss = np.mean(val_loss)
                ep_val_score = np.mean(val_score)
                stop_training = self.es(ep_val_score)

                self.train_hist.append(
                    (ep_train_loss, ep_val_loss, ep_val_score,
                     scheduler.get_last_lr()[0]))

                postfix = dict(train_loss=ep_train_loss,
                               val_loss=ep_val_loss,
                               lb_score=ep_val_score,
                               best_epoch=self.es.best_epoch)

                monitor.set_postfix(**postfix)

                if stop_training: break
                torch.save(model.state_dict(), MODEL_FILE.format(epoch=epoch))
Пример #6
0
    def run_trainer(self):

        if self.notebook:
            from tqdm.notebook import tqdm, trange
        else:
            from tqdm import tqdm, trange

        progressbar = trange(self.epochs, desc="Progress")

        for i in progressbar:

            self.epoch += 1

            self._train()

            if self.validation_DataLoader is not None:
                self._validate()

            if self.lr_scheduler != None:
                if self.validation_DataLoader != None and self.lr_scheduler.__class__.__name__ == 'ReduceLROnPlateau':
                    self.lr_scheduler.batch(self.validation_loss[i])
                else:
                    self.lr_scheduler.batch()

        if self.test_DataLoader is not None:
            self._test()

        return self.training_loss, self.validation_loss, self.validation_IoU, self.test_loss, self.test_IoU
Пример #7
0
    def run_trainer(self):

        if self.notebook:
            from tqdm.notebook import tqdm, trange
        else:
            from tqdm import tqdm, trange

        progressbar = trange(self.epochs, desc='Progress')
        for i in progressbar:
            """Epoch counter"""
            self.epoch += 1  # epoch counter
            """Training block"""
            self._train()
            """Validation block"""
            if self.validation_DataLoader is not None:
                self._validate()
            """Learning rate scheduler block"""
            if self.lr_scheduler is not None:
                if self.validation_DataLoader is not None and self.lr_scheduler.__class__.__name__ == 'ReduceLROnPlateau':
                    self.lr_scheduler.batch(
                        self.validation_loss[i]
                    )  # learning rate scheduler step with validation loss
                else:
                    # self.lr_scheduler.batch()  # learning rate scheduler step
                    self.lr_scheduler.step()  # learning rate scheduler step
        return self.training_loss, self.validation_loss, self.learning_rate, self.mIoU, self.pixel_accuracy
Пример #8
0
    def evaluate(self, max_steps: int = 100):
        try:
            total_steps, total_penalties = 0, 0
            episodes = 100

            for episode in trange(episodes):
                state = self.env.reset(
                )  # reset environment to a new, random state
                nb_steps, penalties, reward = 0, 0, 0

                done = False

                while not done:
                    action = self._get_action_for_state(state)
                    state, reward, done, info = self.env.step(action)

                    if reward == -10:
                        penalties += 1

                    nb_steps += 1
                    if nb_steps == max_steps:
                        done = True

                total_penalties += penalties
                total_steps += nb_steps

            print(f"Results after {episodes} episodes:")
            print(f"Average timesteps per episode: {total_steps / episodes}")
            print(
                f"Average penalties per episode: {total_penalties / episodes}")
        except KeyboardInterrupt:
            pass
Пример #9
0
def train(dataset, model, batch_size, n_epochs):
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    optimizer = torch.optim.AdamW(model.parameters())
    loss_fn = torch.nn.MSELoss()
    epochs = trange(n_epochs, desc="training")
    best = {"loss": sys.float_info.max}
    loss_history = []
    for e in epochs:
        epoch_loss = 0
        for batch in dataloader:
            optimizer.zero_grad()
            given = batch["given"].cuda()
            guess = model(given)
            answer = batch["answer"].cuda()
            loss = loss_fn(answer, guess)
            loss.backward()
            epoch_loss += loss.item()
            optimizer.step()
        loss_history.append(epoch_loss)
        epochs.set_postfix_str(f"loss: {epoch_loss:.6f}")
        if epoch_loss < best["loss"]:
            best["state"] = model.state_dict()
            best["loss"] = epoch_loss
            best["epoch"] = e + 1
    return best, loss_history
Пример #10
0
 def workon():
     if schd._num_steps == 0:
         if step_code not in self.interface.waiting_list:
             print(f'Not queued: [{step_code}].')
             return
         else:
             # wait until its ready
             while schd._num_steps == 0:
                 time.sleep(0.2)
     sup_bar = trange(schd._num_steps, desc=f'[{step_code}]')
     for step in sup_bar:
         n_fin_workers = len(schd._succeeded_workers[step]) \
             if step in schd._succeeded_workers.keys() else 0
         total_workers = len(schd._queues[step])
         sub_bar = progressbar(total=total_workers,
                               desc=f'substep::{step}',
                               initial=n_fin_workers)
         if self.is_failed(step_code, idx=step):
             sub_bar.sp(bar_style='danger')
             break
         while n_fin_workers < total_workers:
             cur_fin_workers = len(schd._succeeded_workers[step])
             delta = cur_fin_workers - n_fin_workers
             if delta > 0:
                 n_fin_workers += delta
                 sub_bar.update(delta)
             time.sleep(0.2)
         if self.is_failed(step_code, idx=step):
             # change bar color to red if any failed workers were found
             sub_bar.sp(bar_style='danger')
             sup_bar.sp(bar_style='danger')
             sub_bar.write(f'Step [{step_code}] has been stopped.')
             self._stop(step_code)
         sub_bar.close()
Пример #11
0
    def getBagRatios(self,inputType,componentInfo=None):
        """
        Arguments:
        - inputType : {true posterior,
                       estimated posterior,
                       nnpu raw,
                       nnpu transform,
                       nnpu estimated rank,
                       nnpu true rank}
        - componentInfo : Required if inputType in {"true posterior","nnpu true rank"}
            - See self.formatComponentInfo for details

        """
        # NClusters x NBags size list containing the density ratio for the unlabeled points
        # from the specified bag in the specified cluster
        self.bagRatios = []
        self.debugLabels= []
        self.debugPosteriors = []
        self.formatComponentInfo(componentInfo)
        for cnum in trange(self.n_clusters):
            if inputType == "true posterior":
                ratios = self.trueDensityRatio(cnum)
            elif inputType == "estimated posterior":
                ratios = self.densityRatioFromEstimatedPNPosterior(cnum)
            elif inputType == "ratio estimation":
                ratios = self.ratioEstimation(cnum)
            elif "nnpu" in inputType:
                ratios = self.estimateClusterDensityRatio(cnum,inputType.replace("nnpu ",""))
            elif inputType == "sugiyama":
                ratios = self.sugiyama(cnum)
            else:
                raise ValueError("Invalid inputType specified: {}".format(inputType))
            self.bagRatios.append(self.splitRatiosIntoBags(ratios,cnum))
    def get_description_data(self):
        col = ['company', 'intro', 'segment', 'industry']

        self.des_df = pd.DataFrame(columns=col)
        self.init_driver()

        try_num = 0
        single_iter = 0
        while True:
            for t in trange(try_num, len(self.company)):
                try:
                    self.des_df = self.des_df.append(pd.DataFrame(
                        [self.get_single_des(self.company[t])], columns=col),
                                                     ignore_index=True)
                    single_iter = 0
                except:
                    self.driver.close()
                    single_iter += 1
                    time.sleep(10)
                    self.init_driver()
                    if single_iter >= 5:
                        single_iter = 0
                        continue
                    try_num = t
                    break
            else:
                break
        return self.des_df
    def get_statement_data(self):
        col = [
            'company', 'mkt_cap', 'pb_ratio', 'beta', 'profit_m', 'roa', 'roe'
        ]
        self.ratio_df = pd.DataFrame(columns=col)
        self.init_driver()

        try_num = 0
        single_iter = 0
        while True:
            for t in trange(try_num, len(self.company)):
                try:
                    self.ratio_df = self.ratio_df.append(pd.DataFrame(
                        [self.get_single_statement(self.company[t])],
                        columns=col),
                                                         ignore_index=True)
                    single_iter = 0
                except:
                    self.driver.close()
                    single_iter += 1
                    time.sleep(10)
                    self.init_driver()
                    if single_iter >= 5:
                        single_iter = 0
                        continue
                    try_num = t
                    break
            else:
                break
        return self.ratio_df
Пример #14
0
    def EM(self,NIters=50):
        true = self.trueEta[:,self.clusterMap]
        self.eta = np.ones((len(self.bags), self.n_clusters)) * self.clusterAlphaHats
#         self.eta = np.copy(self.trueEta)
        plt.scatter(self.eta.ravel(), true.ravel())
        plt.plot([0,1],[0,1])
        plt.title("Eta MAE: {:.3f} - AUC: {:.3f}".format(np.nanmean(np.abs(self.eta - true)),
                                                       self.getAUC()))
        cmap = plt.get_cmap(name="tab20")
        plt.show()
        for em_iter in trange(NIters):
            colors = []
            for cnum in range(self.n_clusters):
                for bagNum,b in enumerate(self.bags):
                    colors.append(cmap(cnum))
                    ratios = self.bagRatios[cnum][bagNum]
                    eij = self.eta[bagNum,cnum]
#                     comp = self.components[cnum]
#                     unlabeled = b.x_unlabeled[b.unlabeled_cluster_assignment == cnum]
#                     # Positive density
#                     f1 = ss.multivariate_normal.pdf(unlabeled,mean=comp.posMean,cov=comp.posCov)
#                     # Negative density
#                     f0 = ss.multivariate_normal.pdf(unlabeled,mean=comp.negMean,cov=comp.negCov)
#                     f = eij * f1 + (1 - eij) * f0
#                     f = f / f.sum()
#                     self.eta[bagNum,cnum] = np.dot(posts, f)
                    posts = eij / (eij + (1-eij) * ratios)
                    self.eta[bagNum,cnum] = np.mean(posts)
            plt.scatter(self.eta.ravel(), true.ravel(),s=25 * self.gamma.ravel(),color=colors)
            plt.plot([0,1],[0,1])
            plt.title("Eta MAE: {:.3f} - AUC: {:.3f}".format(np.nanmean(np.abs(self.eta - true)),
                                                             self.getAUC()))
            plt.show()
Пример #15
0
def simulation(theta, config_details):
    """
        theta: list in R^d where d is the number of parameters
        config_details: dictionary where config_details[i] contains the index corresponding
                        to the config file in configs and the name associated to theta (for changing the config)
    """

    for i, val in enumerate(theta):
        config_type = config_details[i]["config"]
        config_name = config_details[i]["name"]
        # Make sure actual integers has type int, for example val for "testing_delay"
        if val % 1 == 0:
            val = int(val)
        configs[config_type][config_name] = val

    factor_config = utils.get_sub_dictionary(configs["policy_config"],
                                             config.DELVE_CASE_FACTOR_KEYS)
    strategy_config = utils.get_sub_dictionary(
        configs["policy_config"], config.DELVE_STRATEGY_FACTOR_KEYS)

    rng = np.random.RandomState(42)
    simulate_contacts = EmpiricalContactsSimulator(over18, under18, rng)
    tti_model = TTIFlowModel(rng, **strategy_config)

    outputs = list()

    for _ in trange(n_cases):
        case = simulate_case(rng, **configs["case_config"])
        case_factors = CaseFactors.simulate_from(rng, case, **factor_config)
        contacts = simulate_contacts(case, **configs["contacts_config"])
        res = tti_model(case, contacts, case_factors)
        outputs.append(res)
def approx_predict_ts(X, X_df, gen_X, ts_mdl, dist_metric='euclidean', lookback=0,\
                    filt_fn=None, X_scaler=None, y_scaler=None, progress_bar=False, no_info=np.array([[0]])):
    b_size = gen_X[0][0].shape[0]
    preds = None
    if progress_bar:
        rng = trange(X.shape[0], desc='Predicting')
    else:
        rng = range(X.shape[0])
    for i in rng: 
        x = X[i]
        if filt_fn is not None:
            X_filt_df, x = filt_fn(X_df, x, lookback)
        else:
            X_filt_df = X_df
        idx = find_closest_datapoint_idx(x, X_filt_df, dist_metric, find_exact_first=1, scaler=X_scaler)
        
        nidx = idx - lookback
        pred = ts_mdl.predict(gen_X[nidx//b_size][0])[nidx%b_size].reshape(1,-1)
        if i==0:
            preds = pred
        else:
            preds = np.vstack((preds,pred))
    if preds is not None:
        if y_scaler is not None:
            return y_scaler.inverse_transform(preds)
        else:
            return preds
    else:
        return no_info
Пример #17
0
 def __iter__(self):
     for i in trange(self.epoch,
                     self.num_epochs,
                     initial=self.epoch,
                     total=self.num_epochs):
         self.epoch = i
         yield i
Пример #18
0
    def get_run_loop(self, show_pbar: bool = None) -> Iterable[int]:
        """
        Return a tqdm progress bar or a regular range iterator.

        If the code is running in an IPython kernel it will also display the \
        internal ``_notebook_container``.

        Args:
            show_pbar: If ``False`` the progress bar will not be displayed.

        Returns:
            A Progressbar if ``show_pbar`` is ``True`` and the code is running \
            in an IPython kernel. If the code is running in a terminal the logging \
            level must be set at least to "INFO". Otherwise return a range iterator \
            for ``self.max_range`` iteration.

        """
        show_pbar = show_pbar if show_pbar is not None else self.show_pbar
        no_tqdm = not (show_pbar if self._ipython_mode else
                       self._log.level < logging.WARNING and show_pbar)
        if self._ipython_mode:
            from tqdm.notebook import trange
        else:
            from tqdm import trange

        loop_iterable = trange(self.max_epochs,
                               desc="%s" % self.__class__.__name__,
                               disable=no_tqdm)

        if self._ipython_mode and self._use_notebook_widget:
            from IPython.core.display import display

            display(self._notebook_container)
        return loop_iterable
Пример #19
0
    def fit(self):

        try:
            self.config = Config(self.config_file)
            if self.tensorboard is None:
                self.log_dir = os.path.join(self.config.log_dir,
                                            self.model_name)
                self.tensorboard = LogTensorBoard(log_dir=self.log_dir)
            self.tensorboard.set_model(self.model)

            if self.memory is None:
                self.memory = Memory(max_len=self.config.max_queue_length)

            state = self.env.reset()
            done = False
            epsilon = self._get_epsilon(self.current_episode)
            steps_in_episode = 0
            reward_queue = deque(maxlen=10)
            reward_in_episode = 0

            pbar = trange(self.last_step,
                          self.config.train_steps,
                          initial=self.last_step,
                          total=self.config.train_steps)
            for step in pbar:
                steps_in_episode += 1
                self.last_step = step

                # Greedy exploration strategy
                action = self._choose_action(state, epsilon)
                new_state, reward, done, info = self.env.step(action)
                self._remember(state, action, reward, new_state, done)
                reward_in_episode += reward

                if steps_in_episode == self.config.max_steps_per_episode:
                    done = True

                # Train with the Bellman equation
                if step > self.config.warmup_steps:
                    self._train_model(step)

                state = new_state

                if done:
                    steps_in_episode = 0
                    state = self.env.reset()
                    done = False
                    self.current_episode += 1
                    reward_queue.append(reward_in_episode)
                    reward_in_episode = 0
                    epsilon = self._get_epsilon(self.current_episode)
                    pbar.set_postfix({"reward": np.mean(reward_queue)})

                if step % self.config.target_model_update == 0:
                    self.target_model.set_weights(self.model.get_weights())

            self.last_step += 1

        except KeyboardInterrupt:
            print("Training has been interrupted")
Пример #20
0
    def train(self):
        self.head.train()
        history = []
        for epoch in trange(self.train_epochs,
                            desc='NER Train Epochs',
                            leave=False):
            losses = []
            for batch in tqdm(self.train_dataloader,
                              desc='NER train batch',
                              leave=False):
                for key in batch.keys():
                    batch[key] = batch[key].cuda()

                self.optimizer.zero_grad()

                ids = batch['text_ids']
                mask = batch['mask']
                targets = batch['target']

                with torch.no_grad():
                    embeddings = self.lm_model(ids,
                                               mask,
                                               return_embeddings=True)

                loss = self.head(embeddings, mask, targets)
                loss.backward()
                self.optimizer.step()

                losses.append(loss.item())
            history.append(losses)
        return history
Пример #21
0
def train_agent(agent, env, N_ep, save_name=None, show_progress=False):
    """
	Train an agent for a given number of episodes in a given environment
	...
	
	Parameters
	----------
	agent : EQLM.QAgent
		QLearning agent
	env : gym.Wrapper
		Envrionment on which the agent is trained
	N_ep : int
		Number of episodes
	save_name : str, optional
		Name of file to save results, by default does not save
	show_progress : bool, optional
		Displays a tqdm notebook progress bar
		
	Returns
	-------
	R_ep : list
		Cumulative reward for each episode
	steps : list
		Number of environment steps in each episode
	agent : EQLM.QAgent
		The trained agent
	"""
    R_ep = []
    steps = []
    if show_progress:
        t = trange(N_ep, desc='bar_desc', leave=True)
    else:
        t = range(N_ep)
    for ep_no in t:
        s = env.reset()
        done = False
        Rt = 0
        n_step = 0
        while not done:
            a = agent.action_select(s)
            s, r, done, _ = env.step(a)
            agent.update(s, r, done)
            Rt += r
            n_step += 1
        R_ep.append(Rt)
        steps.append(n_step)
        if show_progress:
            if ep_no > 10:
                t.set_description('R: {} Step: {}'.format(
                    np.mean(R_ep[-10:]).round(1), n_step))
                t.refresh()
            else:
                t.set_description('R: {} Step: {}'.format(
                    np.mean(R_ep).round(1), n_step))
                t.refresh()
        if save_name:
            data = {'params': agent.nn.get_params(), 'R': R_ep, 'step': steps}
            pickle.dump(data, open(save_name, 'wb'))
    return R_ep, steps, agent
Пример #22
0
    def predict(self, x_seq, u_seq):
        for _ in trange(self.per_iter, desc='ILQR', leave=False):
            k_seq, kk_seq = self.cal_K(x_seq, u_seq)

            x_seq, u_seq = self.forward(x_seq, u_seq, k_seq, kk_seq)

        u_seq[-1] = u_seq[-2]  # filling
        return np.array(x_seq), np.array(u_seq)
    def __init__(self, config, dataset):
        self.config = config
        self.model = EmbMLP(self.config, dataset.song_vectors,
                            dataset.tag_vectors)
        self.optimizer = keras.optimizers.Adam(config.lr)

        for epoch in trange(1, config.epochs):
            # train
            generator = iter(dataset.generate_input('train',
                                                    config.batch_size))
            N = len(dataset.train_plylst_list)
            steps_per_epoch = (N // config.batch_size) + 1
            loss_list = list()
            for step in trange(1, steps_per_epoch + 1):
                input, label = next(generator)
                label = label[0]
                loss = self.train_batches(input, label)
                loss_list.append(loss.numpy())
                if step % 100 == 0:
                    print(
                        f"epoch/step {epoch}/{step}\t|\tavg. loss: {np.mean(loss_list)}"
                    )

            print(f"epoch {epoch}\t|\tavg. loss: {np.mean(loss_list)}")

            # evaluate
            generator = iter(dataset.generate_input('val', config.batch_size))
            N = len(dataset.val_plylst_list)
            steps_per_epoch = (N // config.batch_size) + 1
            song_ndcg_list = list()
            tag_ndcg_list = list()
            for step in trange(steps_per_epoch):
                input, (song_label, tag_label) = next(generator)
                logits = self.eval_batches(input)
                song_logits, tag_logits = tf.split(
                    logits, (config.n_songs, config.n_tags), -1)
                song_mask = -song_label + 1
                song_masked_logits = song_mask * song_logits
                songs_top100 = tf.math.top_k(song_masked_logits, k=100)

                for i in range(songs_top100.shape[0]):
                    song_nDCG = self._ndcg(songs_top100[i, :].tolist())
                    song_ndcg_list.append(song_nDCG)

            score = 0.85 * np.mean(song_ndcg_list)
            print(f"epoch {epoch}\t|\tscore: {score}")
Пример #24
0
    def __init__(
            self,
            model,
            optimizer,
            scheduler,
            patience,
            metric_fn,
            min_epochs,
            max_epochs,
            dataset_sizes,
            early_stop_on_metric=False,
            lower_is_better=True,
            keep_best_models=False,
            verbose=True
        ):
        """
        Monitor class for logging progress of the
        model during training and validation.
        """
        self.model = model
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.patience = patience
        self.metric_fn = metric_fn
        self.min_epochs = min_epochs
        self.max_epochs = max_epochs
        self.dataset_sizes = dataset_sizes
        self.early_stop_on_metric = early_stop_on_metric
        self.lower_is_better = lower_is_better
        self.verbose = verbose
        
        if verbose:
            self.iter_epochs = trange(max_epochs)
        else:
            self.iter_epochs = range(max_epochs)
            
        if lower_is_better:
            self.epoch_loss = {"train": np.inf, "valid": np.inf}
            self.epoch_metric = np.inf
            self.best_loss = np.inf
            self.best_metric = np.inf
        else:
            self.epoch_loss = {"train": -np.inf, "valid": -np.inf}
            self.epoch_metric = -np.inf
            self.best_loss = -np.inf
            self.best_metric = -np.inf
            
        self.train_loss = list()
        self.valid_loss = list()
        self.valid_metric = list()
            
        self.best_model_state = model.state_dict()
        self.best_models = list()
        self.keep_best_models = keep_best_models

        self.epoch_counter = {"train": 0, "valid": 0}
        self.es_counter = 0
        self.running_loss = 0.0
    def range_test(self):
        iter = 0
        smoothing = 0.05
        self.loss = []
        self.lr = []

        #criterion = nn.CrossEntropyLoss()
        lr_lambda = lambda x: math.exp(x * math.log(self.end_lr / self.start_lr
                                                    ) /
                                       (self.epochs * len(self.dataloader)))
        scheduler = torch.optim.lr_scheduler.LambdaLR(self.optimizer,
                                                      lr_lambda)

        for i in trange(self.epochs):
            for inputs, labels in tqdm(self.dataloader):

                # Send to device
                inputs = inputs.to(self.model.device)
                labels = labels.to(self.model.device)

                # Training mode and zero gradients
                self.model.train()
                self.optimizer.zero_grad()

                # Get outputs to calc loss
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)

                # Backward pass
                loss.backward()
                self.optimizer.step()

                # Update LR
                scheduler.step()
                lr_step = self.optimizer.state_dict()["param_groups"][0]["lr"]
                self.lr.append(lr_step)

                # smooth the loss
                if iter == 0:
                    self.loss.append(loss)
                else:
                    loss = smoothing * loss + (1 - smoothing) * self.loss[-1]
                    self.loss.append(loss)

                iter += 1
                #print(iter, end="*")

        plt.ylabel("loss")
        plt.xlabel("Learning Rate")
        plt.xscale("log")
        plt.plot(self.lr, self.loss)
        plt.show()

        self.model.load_state_dict(self.modelstate)
        self.optimizer.load_state_dict(self.optimstate)

        return (self.lr[self.loss.index(min(self.loss))])
Пример #26
0
  def findLR(self):
    iter = 0
    smoothing = 0.05
    self.loss = []
    self.lr = []
    #print("Epochs - ", self.epochs)

    # Set up ptimizer and loss function for the experiment for our Resnet Model
    optimizer = torch.optim.SGD(self.model.parameters(), self.start_lr)
    criterion = nn.CrossEntropyLoss() 
    lr_lambda = lambda x: math.exp(x * math.log(self.end_lr / self.start_lr) / (self.epochs * self.trainlen/self.batch_size))
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)


    for i in trange(self.epochs):
      print("epoch {}".format(i))
      for inputs, labels in tqdm(self.dataloader):
        
        # Send to device
        inputs = inputs.to(self.model.device)
        labels = labels.to(self.model.device)
        
        # Training mode and zero gradients
        self.model.train()
        optimizer.zero_grad()
        
        # Get outputs to calc loss
        outputs = self.model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()
        optimizer.step()

        # Update LR
        scheduler.step()
        lr_step = optimizer.state_dict()["param_groups"][0]["lr"]
        self.lr.append(lr_step)

        # smooth the loss
        if iter==0:
          self.loss.append(loss)
        else:
          loss = smoothing  * loss + (1 - smoothing) * self.loss[-1]
          self.loss.append(loss)
        
        iter += 1
        #print(iter, end="*")
      
    plt.ylabel("loss")
    plt.xlabel("Learning Rate")
    plt.xscale("log")
    plt.plot(self.lr, self.loss)
    plt.show()

    return(self.lr[self.loss.index(min(self.loss))])
Пример #27
0
    def forward(self):
        if exists(self.start_image):
            tqdm.write('Preparing with initial image...')
            optim = DiffGrad(self.model.parameters(), lr = self.start_image_lr)
            pbar = trange(self.start_image_train_iters, desc='iteration')
            for _ in pbar:
                loss = self.model.model(self.start_image)
                loss.backward()
                pbar.set_description(f'loss: {loss.item():.2f}')

                optim.step()
                optim.zero_grad()

                if terminate:
                    print('interrupted by keyboard, gracefully exiting')
                    return exit()

            del self.start_image
            del optim

        tqdm.write(f'Imagining "{self.textpath}" from the depths of my weights...')

        with torch.no_grad():
            self.model(self.clip_encoding, dry_run=True) # do one warmup step due to potential issue with CLIP and CUDA

        if self.open_folder:
            open_folder('./')
            self.open_folder = False

        for epoch in trange(self.epochs, desc='epochs'):
            pbar = trange(self.iterations, desc='iteration')
            for i in pbar:
                _, loss = self.train_step(epoch, i)
                pbar.set_description(f'loss: {loss.item():.2f}')

                if terminate:
                    print('interrupted by keyboard, gracefully exiting')
                    return
            # Update clip_encoding per epoch if we are creating a story
            if self.create_story:
                self.clip_encoding = self.update_story_encoding(epoch, i)

        self.save_image(epoch, i) # one final save at end
Пример #28
0
    def train(self):
        '''
        This function handles the entirety of the training, dev, and scoring.
        '''
        # tell the user general metrics
        self.logger.info(f"Number of examples: {len(self.train_examples)}")
        self.logger.info(f"Batch size: {self.args.batch_size}")
        self.logger.info(
            f"Number of optimization steps: {self.num_train_optimization_steps}"
        )

        # instantiate dataloader
        train_dataloader = DataLoader(self.train_examples,
                                      batch_size=self.args.batch_size,
                                      shuffle=True,
                                      num_workers=self.args.num_workers,
                                      drop_last=False,
                                      collate_fn=collate_squad_train)
        # for each epoch
        for epoch in trange(int(self.args.epochs), desc="Epoch"):
            # train
            self.train_epoch(train_dataloader)
            # get dev loss
            dev_loss = BertQAEvaluator(self.model, self.processor,
                                       self.args).get_loss()
            # get scoring logits and indices
            logits, indices = BertQAEvaluator(self.model, self.processor,
                                              self.args).get_scores()
            # compute scores
            metrics = BertQAEvaluator(self.model, self.processor,
                                      self.args).score_squad_val(
                                          shuffled_idx=indices,
                                          logits=logits,
                                          n_best_size=20,
                                          max_answer=30)
            # print validation results
            self.logger.info(
                "Epoch {0: d}, Dev/Exact {1: 0.3f}, Dev/F1. {2: 0.3f}",
                epoch + 1, metrics['exact'], metrics['f1'])

            # update validation results
            if metrics['f1'] > self.best_dev_f1:
                self.unimproved_iters = 0
                self.best_dev_f1 = metrics['f1']
                torch.save(self.model, self.snapshot_path)

            else:
                # stop training with early stopping
                self.unimproved_iters += 1
                if self.unimproved_iters >= self.args.patience:
                    self.early_stop = True
                    self.logger.info(
                        f"Early Stopping. Epoch: {epoch}, Best Dev F1: {self.best_dev_f1}"
                    )
                    break
def scrape(start_year, end_year):

    # List of games, each game is a dictionary object
    # List of meta information to avoid multiple additions
    games = []
    meta = []
    url = 'https://www.covers.com/sport/basketball/nba/teams/main/'

    game_types = {
        'R': ['Regular Season'],
        'P': ['Playoffs'],
        'B': ['Regular Season', 'Playoffs']
    }

    #start_year_input = input('\nInput starting year of season that you want ' +
    #                    'to start scraping from  (e.g. 2015 if start at 2015): ')
    #end_year_input = input('\nInput ending year of season that you want to ' +
    #                    'end scraping at (e.g. 2017 if scraping 2015-2016' +
    #                    'and 2016-2017 season')
    #try:
    #    start_year = int(start_year_input)
    #    end_year = int(end_year_input)
    #except:
    #    sys.exit('Invalid Year')
    specs = 'R'
    #specs = input('\nScrape Regular Season (R), Playoff Games (P), or both (B): (default R)')
    #specs= 'R' if specs== '' else specs
    if specs not in game_types.keys():
        sys.exit('Invalid Game Specs')

    for year in trange(start_year, end_year, desc='Years', leave=True):
        t = tqdm(teamDict.keys(), desc='Teams', leave=False)
        print('Initializing Records for ' + str(year) + '...')
        records = buildTable(year + 1)
        print('Getting conference lists ...')
        conference_list = getConferences(year + 1)
        for team_abbr in t:
            if (teamDict[team_abbr] not in conference_list['eastern']
                    and teamDict[team_abbr] not in conference_list['western']):
                continue
            t.set_description(team_abbr)
            t.refresh()
            team_name = teamDict[team_abbr].lower()
            team_url = url + team_name.replace(" ", "-") + \
                    '/' + str(year) + '-' + str(year+1)
            # Send in built table for record scraping
            add_games, add_meta = bettingLinesScraper(team_url, team_abbr,
                                                      year, game_types[specs],
                                                      records, conference_list,
                                                      meta)

            games = games + add_games
            meta = meta + add_meta
    return games
Пример #30
0
    def __init__(self, bags,n_clusters):
        self.bags = bags
        self.n_clusters = n_clusters
        self.findGlobalClusters()
        self.tau_posS = []
        self.tau_uS = []
        self.clusterAlphaHats = np.zeros(n_clusters)
        for cnum in trange(n_clusters,desc="transforms and class prior estimation"):
            self.estimateClusterClassPrior(cnum)

        self.getEta()