예제 #1
0
    def train(self,
              num_iterations: int,
              save_path: Optional[str] = None,
              disable_tqdm: bool = False,
              **collect_kwargs):

        print(f"Begin training, logged in {self.path}")
        timer = Timer()
        step_timer = Timer()

        # Store the first agent
        # saved_agents = [copy.deepcopy(self.agent.model.state_dict())]

        if save_path:
            torch.save(self.agent.model,
                       os.path.join(save_path, "base_agent.pt"))

        rewards = []

        for step in trange(num_iterations, disable=disable_tqdm):
            ########################################### Collect the data ###############################################
            timer.checkpoint()

            # data_batch = self.collector.collect_data(num_episodes=self.config["episodes"])
            data_batch = self.collector.collect_data(
                num_steps=self.config["steps"])
            data_time = timer.checkpoint()
            ############################################## Update policy ##############################################
            # Perform the PPO update
            metrics = self.ppo.train_on_data(data_batch,
                                             step,
                                             writer=self.writer)

            eval_batch = self.collector.collect_data(num_steps=1001)
            reward = eval_batch['rewards'].sum().item()
            rewards.append(reward)

            end_time = step_timer.checkpoint()

            if step % 500 == 0:
                print(
                    f"Current reward: {reward:.3f}, Avg over last 100 iterations: {np.mean(rewards[-100:]):.3f}"
                )

            # Save the agent to disk
            if save_path:
                torch.save(self.agent.model.state_dict(),
                           os.path.join(save_path, f"weights_{step + 1}"))

            # Write training time metrics to tensorboard
            time_metrics = {
                "agent/time_data": data_time,
                "agent/time_total": end_time,
                "agent/eval_reward": reward
            }

            write_dict(time_metrics, step, self.writer)

        return rewards
    def train(self,
              num_iterations: int,
              save_path: Optional[str] = None,
              disable_tqdm: bool = False,
              **collect_kwargs):

        print(f"Begin training, logged in {self.path}")
        timer = Timer()
        step_timer = Timer()

        # Store the first agent
        # saved_agents = [copy.deepcopy(self.agent.model.state_dict())]

        if save_path:
            for path, (agent_id, agent) in zip(self.agent_paths,
                                               self.agents.items()):
                torch.save(agent.model, os.path.join(str(path),
                                                     "base_agent.pt"))

        rewards = []

        for step in trange(num_iterations, disable=disable_tqdm):
            ########################################### Collect the data ###############################################
            timer.checkpoint()

            # data_batch = self.collector.collect_data(num_episodes=self.config["episodes"])
            data_batch = self.collector.collect_data(
                num_steps=self.config["steps"],
                gamma=self.config["gamma"],
                tau=self.config["tau"])
            data_time = timer.checkpoint()
            ############################################## Update policy ##############################################
            # Perform the PPO update
            metrics = self.ppo.train_on_data(data_batch,
                                             step,
                                             writer=self.writer)

            # eval_batch = self.collector.collect_data(num_steps=1001)
            # reward = eval_batch['rewards'].sum().item()
            # rewards.append(reward)

            end_time = step_timer.checkpoint()

            # Save the agent to disk
            if save_path:
                for path, (agent_id, agent) in zip(self.agent_paths,
                                                   self.agents.items()):
                    torch.save(agent.model.state_dict(),
                               os.path.join(str(path), f"weights_{step + 1}"))

            # Write training time metrics to tensorboard
            time_metrics = {
                "agent/time_data": data_time,
                "agent/time_total": end_time,
                # "agent/eval_reward": reward
            }

            write_dict(time_metrics, step, self.writer)
예제 #3
0
def main(args):
    utils.delete_path(args.log_dir)
    utils.delete_path(args.save_dir)
    utils.ensure_path(args.save_dir)
    utils.ensure_path(args.log_dir)
    utils.write_dict(vars(args), os.path.join(args.save_dir, 'arguments.csv'))

    torch.manual_seed(args.seed)
    cudnn.benchmark = True 
    torch.cuda.manual_seed_all(args.seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus

    if args.mode == 'train':
        train(args)
    elif args.mode == 'test':
        test(args)
예제 #4
0
def main(args):
    fd_label = args.data_path

    hout = args.input_size
    wout = args.input_size
    wh = np.array([])

    for root in os.listdir(fd_label):
        fn_label = os.path.join(fd_label, root)
        f, ext = os.path.splitext(fn_label)
        if ext == ".txt":
            a = np.loadtxt(fn_label)
            if wh.shape[0] == 0:
                wh = a
                if len(wh.shape) == 1:
                    wh = np.expand_dims(wh, axis=0)
            else:
                if len(a.shape) == 1:
                    a = np.expand_dims(a, axis=0)
                wh = np.concatenate((wh, a), axis=0)

    wh0 = wh[:, 3:]
    wh0[:, 0] = wh0[:, 0] * wout / 32
    wh0[:, 1] = wh0[:, 1] * hout / 32

    wh_res = KMeans(n_clusters=args.n_clusters, random_state=0).fit(wh0)

    result = wh_res.cluster_centers_ * 32
    result = result[np.argsort(result[:, 0])]
    str_anchors = ''
    for i, v in enumerate(result):
        if i == (args.n_clusters - 1):
            str_anchors += ' ' + str(int(v[0] + 0.5)) + ',' + str(
                int(v[1] + 0.5))
        else:
            str_anchors += ' ' + str(int(v[0] + 0.5)) + ',' + str(
                int(v[1] + 0.5)) + ','
    print(str_anchors)

    write_dict('input_size', args.input_size)
    write_dict('n_clusters', args.n_clusters)
    write_dict('anchors', str_anchors)
예제 #5
0
            else:
                print(content.replace('\n', ''))
                content = 'subdivisions=32' + '\n'

        if content.startswith('width'):
            content = 'width=' + str(input_size) + '\n'
            print(content.replace('\n', ''))

        if content.startswith('height'):
            content = 'height=' + str(input_size) + '\n'
            print(content.replace('\n', ''))

        if content.startswith('max_batches='):
            content = 'max_batches=' + str(max_batches) + '\n'
            print(content.replace('\n', ''))
            write_dict('max_batches', max_batches)

        if content.startswith('steps'):
            content = 'steps=' + str(step1) + ',' + str(step2) + '\n'
            print(content.replace('\n', ''))
        #等好之前有个空格,和不用修改的filters区分开来
        if content.startswith('filters ='):
            content = 'filters =' + str(int(n_clusters / 3 *
                                            (n_classes + 5))) + '\n'
            print(content.replace('\n', ''))

        if content.startswith('mask'):
            if n_clusters == 6:
                content = 'mask=' + str(n_clusters - mask_index -
                                        2) + ',' + str(n_clusters -
                                                       mask_index - 1) + '\n'
예제 #6
0
파일: main.py 프로젝트: zac-k/phase-ret-dl
assert len(hyperparameters['Hidden Layer Size']) == len(
    hyperparameters['Activation Functions'])

magnetisation = specimen_parameters[
    'Mass Magnetization'] * specimen_parameters['Density'] * 1000  # A/m

exp_path = paths['Experimental Data Path']
image_output_path = paths['Image Output Path']
phase_output_path = paths['Phase Output Path']
error_output_path = paths['Error Output Path']
load_model_path = paths['Load Model Path']
save_model_path = paths['Save Model Path']
f = open(error_output_path + 'errors.txt', 'w')

n_savefile_sets = hyperparameters['Train/Valid/Test Split']
utils.write_dict(f, hyperparameters)
utils.write_dict(f, simulation_parameters)
utils.write_dict(f, imaging_parameters)
utils.write_dict(f, specimen_parameters)

# Set image size and shape
img_size = imaging_parameters['Image Size in Pixels']
M = imaging_parameters['Multislice Resolution in Pixels']
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)

use_multislice = imaging_parameters['Use Multislice']

# Set size of hidden layers
hidden_layer_size = hyperparameters['Hidden Layer Size']
예제 #7
0
# Initialize the Neural Network
if os.path.exists(network_fname):
    print('Loading existing network file')
    with open(network_fname, 'rb') as fh:
        ntwk = pickle.load(fh)
else:
    print('Building the Network')
    ntwk = nn.NeuralNet(scriber.height, alphabet_size, **nnet_args)
    with open(network_fname, 'wb') as fh:
        pickle.dump(ntwk, fh)


# Print
print('\nArguments:')
utils.write_dict(args)
print('FloatX: {}'.format(th.config.floatX))
print('Alphabet Size: {}'.format(alphabet_size))

################################ Train
print('Training the Network')
for epoch in range(num_epochs):
    ntwk.update_learning_rate(epoch)
    edit_dist, tot_len = 0, 0

    print('Epoch: {} '.format(epoch))
    # keeping 1 backup file as data might get lost, if script is stopped while pickling
    os.rename(network_fname, 'ntwk.bkp.pkl')
    with open(network_fname, 'wb') as fh:
        pickle.dump(ntwk, fh)
    print('Network saved to {}'.format(network_fname))
예제 #8
0
 def write_info(self, save_path='./', save_name='Arena Dict.txt'):
     ensure_path(save_path)
     utils.write_dict()
     with open(save_path + save_name, 'w') as f:
         return # to be implemented
예제 #9
0
    def train(self):

        start_time = time.time()

        self.episodes = self.env.generate_episodes(config.NUM_EPISODES, self)

        # Computing returns and estimating advantage function.
        for episode in self.episodes:
            episode["baseline"] = self.value_func.predict(episode)
            episode["returns"] = utils.discount(episode["rewards"],
                                                config.GAMMA)
            episode["advantage"] = episode["returns"] - episode["baseline"]

        # Updating policy.
        actions_dist_n = np.concatenate(
            [episode["actions_dist"] for episode in self.episodes])
        states_n = np.concatenate(
            [episode["states"] for episode in self.episodes])
        actions_n = np.concatenate(
            [episode["actions"] for episode in self.episodes])
        baseline_n = np.concatenate(
            [episode["baseline"] for episode in self.episodes])
        returns_n = np.concatenate(
            [episode["returns"] for episode in self.episodes])

        # Standardize the advantage function to have mean=0 and std=1.
        advantage_n = np.concatenate(
            [episode["advantage"] for episode in self.episodes])
        advantage_n -= advantage_n.mean()
        advantage_n /= (advantage_n.std() + 1e-8)

        # Computing baseline function for next iter.
        print(states_n.shape, actions_n.shape, advantage_n.shape,
              actions_dist_n.shape)
        feed = {
            self.policy.state: states_n,
            self.action: actions_n,
            self.advantage: advantage_n,
            self.policy.pi_theta_old: actions_dist_n
        }

        episoderewards = np.array(
            [episode["rewards"].sum() for episode in self.episodes])

        #print("\n********** Iteration %i ************" % i)

        self.value_func.fit(self.episodes)
        self.theta_old = self.current_theta()

        def fisher_vector_product(p):
            feed[self.flat_tangent] = p
            return self.session.run(self.fisher_vect_prod,
                                    feed) + config.CG_DAMP * p

        self.g = self.session.run(self.surr_loss_grad, feed_dict=feed)

        self.grad_step = utils.conjugate_gradient(fisher_vector_product,
                                                  -self.g)

        self.sAs = .5 * self.grad_step.dot(
            fisher_vector_product(self.grad_step))

        self.beta_inv = np.sqrt(self.sAs / config.MAX_KL)
        self.full_grad_step = self.grad_step / self.beta_inv

        self.negdot_grad_step = -self.g.dot(self.grad_step)

        def loss(th):
            self.set_theta(th)
            return self.session.run(self.surr_loss, feed_dict=feed)

        self.theta = utils.line_search(loss, self.theta_old,
                                       self.full_grad_step,
                                       self.negdot_grad_step / self.beta_inv)
        self.set_theta(self.theta)

        surr_loss_new = -self.session.run(self.surr_loss, feed_dict=feed)
        KL_old_new = self.session.run(self.KL, feed_dict=feed)
        entropy = self.session.run(self.entropy, feed_dict=feed)

        old_new_norm = np.sum((self.theta - self.theta_old)**2)

        if np.abs(KL_old_new) > 2.0 * config.MAX_KL:
            print("Keeping old theta")
            self.set_theta(self.theta_old)

        stats = {}
        stats["L2 of old - new"] = old_new_norm
        stats["Total number of episodes"] = len(self.episodes)
        stats["Average sum of rewards per episode"] = episoderewards.mean()
        stats["Entropy"] = entropy
        exp = utils.explained_variance(np.array(baseline_n),
                                       np.array(returns_n))
        stats["Baseline explained"] = exp
        stats["Time elapsed"] = "%.2f mins" % (
            (time.time() - start_time) / 60.0)
        stats["KL between old and new distribution"] = KL_old_new
        stats["Surrogate loss"] = surr_loss_new
        self.stats.append(stats)
        utils.write_dict(stats)
        save_path = self.saver.save(self.session, "./checkpoints/model.ckpt")
        print('Saved checkpoint to %s' % save_path)
        for k, v in stats.items():
            print(k + ": " + " " * (40 - len(k)) + str(v))
    def train_on_data(self, data_batch: DataBatch,
                      step: int = 0,
                      writer: Optional[SummaryWriter] = None) -> Dict[str, float]:
        """
        Performs a single update step with PPO on the given batch of data.

        Args:
            data_batch: DataBatch, dictionary
            step:
            writer:

        Returns:

        """
        metrics = {}
        timer = Timer()

        entropy_coeff = self.config["entropy_coeff"]

        agent = self.agent
        optimizer = self.optimizer

        agent_batch = data_batch

        ####################################### Unpack and prepare the data #######################################

        if self.config["use_gpu"]:
            agent_batch = batch_to_gpu(agent_batch)
            agent.cuda()

        # Initialize metrics
        kl_divergence = 0.
        ppo_step = -1
        value_loss = torch.tensor(0)
        policy_loss = torch.tensor(0)
        loss = torch.tensor(0)

        batcher = Batcher(agent_batch['dones'].size(0) // self.config["minibatches"],
                          [np.arange(agent_batch['dones'].size(0))])

        # Start a timer
        timer.checkpoint()

        for ppo_step in range(self.config["ppo_steps"]):
            batcher.shuffle()

            # for indices, agent_minibatch in minibatches(agent_batch, self.config["batch_size"], shuffle=True):
            while not batcher.end():
                batch_indices = batcher.next_batch()[0]
                batch_indices = torch.tensor(batch_indices).long()

                agent_minibatch = index_data(agent_batch, batch_indices)
                # Evaluate again after the PPO step, for new values and gradients
                logprob_batch, value_batch, entropy_batch = agent.evaluate_actions(agent_minibatch)

                advantages_batch = agent_minibatch['advantages']
                old_logprobs_minibatch = agent_minibatch['logprobs']  # logprobs of taken actions
                discounted_batch = agent_minibatch['rewards_to_go']

                ######################################### Compute the loss #############################################
                # Surrogate loss
                prob_ratio = torch.exp(logprob_batch - old_logprobs_minibatch)
                surr1 = prob_ratio * advantages_batch
                surr2 = prob_ratio.clamp(1. - self.eps, 1 + self.eps) * advantages_batch
                # surr2 = torch.where(advantages_batch > 0,
                #                     (1. + self.eps) * advantages_batch,
                #                     (1. - self.eps) * advantages_batch)

                policy_loss = -torch.min(surr1, surr2)
                value_loss = 0.5 * (value_batch - discounted_batch) ** 2
                # import pdb; pdb.set_trace()
                loss = (torch.mean(policy_loss)
                        + (self.config["value_loss_coeff"] * torch.mean(value_loss))
                        - (entropy_coeff * torch.mean(entropy_batch)))

                ############################################# Update step ##############################################
                optimizer.zero_grad()
                loss.backward()
                if self.config["max_grad_norm"] is not None:
                    nn.utils.clip_grad_norm_(agent.model.parameters(), self.config["max_grad_norm"])
                optimizer.step()

            # logprob_batch, value_batch, entropy_batch = agent.evaluate_actions(agent_batch)
            #
            # kl_divergence = torch.mean(old_logprobs_batch - logprob_batch).item()
            # if abs(kl_divergence) > self.config["target_kl"]:
            #     break

        agent.cpu()

        # Training-related metrics
        metrics[f"agent/time_update"] = timer.checkpoint()
        metrics[f"agent/kl_divergence"] = kl_divergence
        metrics[f"agent/ppo_steps_made"] = ppo_step + 1
        metrics[f"agent/policy_loss"] = torch.mean(policy_loss).cpu().item()
        metrics[f"agent/value_loss"] = torch.mean(value_loss).cpu().item()
        metrics[f"agent/total_loss"] = loss.detach().cpu().item()
        metrics[f"agent/rewards"] = agent_batch['rewards'].cpu().sum().item()
        metrics[f"agent/mean_std"] = agent.model.std.mean().item()

        # Other metrics
        # metrics[f"agent/mean_entropy"] = torch.mean(entropy_batch).item()

        # Write the metrics to tensorboard
        write_dict(metrics, step, writer)

        return metrics
예제 #11
0
 def _write_token(self, res):
     if self.is_authorized() and res:
         token_file = os.path.join(utils.get_user_home(), TOKEN_FILENAME)
         utils.write_dict(token_file, res)
         return True
예제 #12
0
# Initialize Language
lang.select_labeler(args['labeler'])
alphabet_size = len(lang.symbols)

# Initialize Scriber
scribe_args['dtype'] = th.config.floatX
scriber = Scribe(lang, **scribe_args)
printer = utils.Printer(lang.symbols)

# Initialize the Neural Network
print('Building the Network')
ntwk = nn.NeuralNet(scriber.height, alphabet_size, **nnet_args)

# Print
print('\nArguments:')
utils.write_dict(args)
print('FloatX: {}'.format(th.config.floatX))
print('Alphabet Size: {}'.format(alphabet_size))

################################ Train
print('Training the Network')
for epoch in range(num_epochs):
    ntwk.update_learning_rate(epoch)
    edit_dist, tot_len = 0, 0

    for samp in range(num_samples):
        x, _, y = scriber.get_text_image()
        y_blanked = utils.insert_blanks(y, alphabet_size, num_blanks_at_start=2)
        # if len(y_blanked) < 2:
        #     print(y_blanked, end=' ')
        #     continue
예제 #13
0
#! /usr/bin/env python3

import csv

from utils import read_dict, write_dict

filename = "dict.csv"

rows = read_dict(filename)
write_dict(filename, rows)
예제 #14
0
# -*- coding: utf-8 -*-
# @Time    : 2019-10-19 14:44
# @Author  : Yan An
# @Contact: [email protected]

import os
import xml.etree.ElementTree as xml_tree

from tqdm import tqdm
from utils import write_dict

path = './darknet/dataset/img'

files = [x for x in os.listdir(path) if x.endswith('xml')]

write_dict('total', len(files))

n_classes = {}
n_classes_index = 0

print('Convert xml to txt')
for file in tqdm(files):
    xml_path = os.path.join(path, file)
    txt_name = xml_path.replace('xml', 'txt')
    if os.path.exists(txt_name):
        continue
    tree = xml_tree.parse(xml_path)
    root = tree.getroot()

    size = root.find('size')
    width = int(size.find('width').text)
예제 #15
0
 def save_vocab(self, vocab_path):
     _freq_dict = OrderedDict(sorted(self._freq_dict.items(), key=lambda kv:kv[1], reverse=True))
     utils.write_dict(vocab_path+'.word2idx.json', self._word2idx)
     utils.write_dict(vocab_path+'.freq.json', _freq_dict)
예제 #16
0
import os
import random

from tqdm import tqdm
from utils import write_dict

path = './darknet/dataset/img'

files = os.listdir(path)
pictures = [x for x in files if x.endswith('jpg')]

train_samples = random.sample(pictures, int(0.7 * len(pictures)))

path = os.path.dirname(os.path.abspath(__file__))
f_train = open('./darknet/dataset/' + 'train.txt', 'w', encoding='utf-8')
f_valid = open('./darknet/dataset/' + 'valid.txt', 'w', encoding='utf-8')

print('generate train txt')
for train_sample in tqdm(train_samples):
    f_train.write(
        os.path.join(path, 'darknet/dataset/img/') + train_sample + '\n')

valid_samples = [x for x in pictures if x not in train_samples]

print('generate valid txt')
for valid_sample in tqdm(valid_samples):
    f_valid.write(
        os.path.join(path, 'darknet/dataset/img/') + valid_sample + '\n')

write_dict('train', len(train_samples))
write_dict('valid', len(valid_samples))