Exemplo n.º 1
0
    def train(self, env, episodes, time_steps):
        stats = EpisodeStats(episode_lengths=np.zeros(episodes),
                             episode_rewards=np.zeros(episodes))

        for i_episode in range(1, episodes + 1):
            # Generate an episode.
            # An episode is an array of (state, action, reward) tuples
            s = env.reset()
            comounded_decay = 1
            for t in range(time_steps):
                a, log_prob_a = self.get_action(s)
                ns, r, d, _ = env.step(a)

                stats.episode_rewards[i_episode - 1] += r
                stats.episode_lengths[i_episode - 1] = t

                target = r
                if not d:
                    target = target + self._gamma * self._V(
                        tt(ns)).cpu().detach()
                baseline = self._V(tt(s))
                advantage = target - baseline
                comounded_decay *= self._gamma
                self._train_baseline(target, baseline)
                self._train_policy(advantage, comounded_decay, log_prob_a)

                if d:
                    break
                s = ns

            print(
                f"{stats.episode_lengths[i_episode-1]} Steps in Episode {i_episode}/{episodes}. Reward {stats.episode_rewards[i_episode-1]}"
            )
        return stats
Exemplo n.º 2
0
  def random_next_batch(self, batch_size):
    batch_indices = np.random.choice(len(self._data.states),batch_size)
    batch_states = np.array([self._data.states[i] for i in batch_indices])
    batch_actions = np.array([self._data.actions[i] for i in batch_indices])
    batch_next_state = np.array([self._data.next_states[i] for i in batch_indices])
    batch_rewards = np.array([self._data.rewards[i] for i in batch_indices])
    batch_terminal_flags = np.array([self._data.terminal_flags[i] for i in batch_indices])

    return tt(batch_states), tt(batch_actions), tt(batch_next_state), tt(batch_rewards), tt(batch_terminal_flags)
Exemplo n.º 3
0
	def Q1(self, s, a):
		s = tt(s)
		a = tt(a)

		if len(s.shape) == 1:
			x = torch.cat((s, a))
		else:
			x = torch.cat((s, a), dim=1)

		q1 = self._Q1(x)

		return q1
Exemplo n.º 4
0
    def forward(self, x, mu, sigma):

        x = tt(x)
        mu = tt(mu)
        sigma = tt(sigma)

        self._mu = mu
        self._sigma = sigma

        p = 1 / (sigma * np.sqrt(2 * np.pi)) * torch.exp(
            (-1 / 2) * (torch.div(mu - x, sigma)**2))

        return p
Exemplo n.º 5
0
    def forward(self, x, alpha, beta):
        x = tt(x)
        alpha = tt(alpha)
        beta = tt(beta)

        self._alpha = alpha
        self._beta = beta

        beta_ab = torch.exp((torch.lgamma(alpha) + torch.lgamma(beta) -
                             torch.lgamma(alpha + beta)))

        p = (torch.pow(x, alpha - 1) * torch.pow(1 - x, beta - 1)) / beta_ab

        return p
Exemplo n.º 6
0
    def mode(self):

        alpha = self._alpha.detach().numpy()
        beta = self._beta.detach().numpy()

        mode = np.zeros(alpha.shape[0])

        indices = np.arange(0, mode.shape[0])

        idx = indices[(alpha > 1) & (beta > 1)]
        mode[idx] = (alpha[idx] - 1) / (alpha[idx] + beta[idx] - 2)

        # Uniform
        idx = indices[(alpha == 1) & (beta == 1)]
        mode[idx] = np.random.uniform(0, 1, len(idx))

        # Bi-Modal
        idx = indices[(alpha < 1) & (beta < 1)]
        mode[idx] = np.random.choice([0, 1], len(idx))

        idx = indices[(alpha <= 1) & (beta > 1)]
        mode[idx] = 0

        idx = indices[(alpha > 1) & (beta <= 1)]
        mode[idx] = 1

        return tt(mode)
Exemplo n.º 7
0
 def update(self, X_batch, y_batch):
     self.optimizer.zero_grad()
     y_batch_pred = self.net(tt(X_batch))
     loss = self.criterion(y_batch_pred, _y(y_batch))
     loss.backward()
     self.optimizer.step()
     return loss
Exemplo n.º 8
0
def list_task(bdstoken):
    url = CLOUD_DL + '?bdstoken=' +  bdstoken + \
    '&need_task_info=1&status=255&start=0&limit=100&method=list_task&app_id=250528&t=' + \
     utils.tt() + '&bdstoken=' +  bdstoken + '&channel=chunlei&clienttype=0&web=1&app_id=250528'
    xml = fetch(url, {}, utils.myname(), {})
    j = json.loads(xml.decode("utf-8"))

    return j
Exemplo n.º 9
0
def query_task(bdstoken, taskid):
    url =  CLOUD_DL + "?bdstoken=" +  bdstoken + "&task_ids=" + \
    taskid + "&op_type=1&method=query_task&app_id=250528&t=" + utils.tt() + \
    "&bdstoken=" +  bdstoken + "&channel=chunlei&clienttype=0&web=1&app_id=250528"
    xml = fetch(url, {}, utils.myname(), {})
    j = json.loads(xml.decode("utf-8"))
    logger.debug("json: %s " % str(j))
    return (j, taskid)
Exemplo n.º 10
0
def list_task(bdstoken):
	url = CLOUD_DL + '?bdstoken=' +  bdstoken + \
	'&need_task_info=1&status=255&start=0&limit=100&method=list_task&app_id=250528&t=' + \
	 utils.tt() + '&bdstoken=' +  bdstoken + '&channel=chunlei&clienttype=0&web=1&app_id=250528'
	xml = fetch(url,{},utils.myname(),{})
	j = json.loads(xml.decode("utf-8"))
	
	return j
Exemplo n.º 11
0
    def forward(self, x):

        if not isinstance(x, torch.Tensor):
            x = tt(x)

        x = self._fc1(x)

        return x
Exemplo n.º 12
0
def query_task(bdstoken,taskid):
	url =  CLOUD_DL + "?bdstoken=" +  bdstoken + "&task_ids=" + \
	taskid + "&op_type=1&method=query_task&app_id=250528&t=" + utils.tt() + \
	"&bdstoken=" +  bdstoken + "&channel=chunlei&clienttype=0&web=1&app_id=250528"
	xml = fetch(url,{},utils.myname(),{})
	j = json.loads(xml.decode("utf-8"))
	logger.debug("json: %s "% str(j))
	return (j,taskid)
    def get_action(self, s):
        mu_action = self._pi(tt(s))
        # mu_action = self._pi(tt(s)).detach().numpy()
        action_sampled = np.random.normal(loc=mu_action.detach().numpy(),
                                          scale=0.1,
                                          size=1)
        action_sampled = np.clip(action_sampled, a_min=-1.0, a_max=1.0)

        log_prob = torch.log(mu_action + torch.normal(mean=mu_action))
        return action_sampled, log_prob
Exemplo n.º 14
0
def login_check(username, token):
    header = {
        'Host': PASSPORT_HOST,
        'Referer': PAN_INDEX,
    }
    cbs = utils.cbs_token()
    url = PASSPORT_API + '/?loginhistory&token=' + token + \
    '&tpl=netdisk&apiver=v3&tt=' + utils.tt() + '&username='******'&isphone=false&callback=' + cbs
    xml = fetch(url, {}, utils.myname(), header)
    xml = utils.fix_json(xml)
    return xml
Exemplo n.º 15
0
    def get_action(self, s):
        probs = self._pi(tt(s))
        action = np.random.choice(a=self._action_dim,
                                  p=np.squeeze(probs.detach().numpy()))
        log_prob = torch.log(probs.squeeze(0)[action])

        # converting the discrete action [0,1,2,...]
        # to an action in the continuous
        # range (actionspace.low <--> actionspace.high)
        if self.d2c:
            action = self.d2c(action)

        return action, log_prob
Exemplo n.º 16
0
def get_token():
    header = {
        'Host': PASSPORT_HOST,
        'Referer': PAN_INDEX,
    }
    cbs = utils.cbs_token()
    login_init = utils.tt()
    url = PASSPORT_API + '/?getapi&tpl=netdisk&apiver=v3&tt='+ \
    login_init + '&class=login&logintype=basicLogin&callback=' + cbs
    logger.debug('url:: %s ' % url)
    xml = fetch(url, {}, utils.myname(), header)
    xml = utils.fix_json(xml.decode('utf-8'))
    token = json.loads(xml)['data']['token']
    logger.debug("token:%s" % token)

    return token
Exemplo n.º 17
0
    def forward(self, x):

        if not isinstance(x, torch.Tensor):
            x = tt(x)

        for i in range(len(self.layers) - 1):
            x = self.layers[i](x)
            if self._hidden_non_linearity is not None:
                x = self._hidden_non_linearity(x)

        x = self.layers[-1](x)

        if self._output_non_linearity is not None:
            x = self._output_non_linearity(x)

        return x
Exemplo n.º 18
0
def login(rsakey, pubkey, username, password, token):
    url = PASSPORT_API + '/?login'
    login_start = utils.tt()
    header = {
        'Host': PASSPORT_HOST,
        'Referer': PAN_INDEX,
        'Origin': PAN_INDEX,
        'Content-Type': 'application/x-www-form-urlencoded',
    }
    logger.debug("encrypted pw: %s " % RSA_encrypt(pubkey, password))
    post = {
        'apiver': 'v3',
        'callback': 'parent.' + utils.cbs_token(),
        'charset': 'utf-8',
        'codestring': '',
        'isPhone': 'false',
        'loginmerge': 'true',
        'logintype': 'basicLogin',
        'mem_pass': '******',
        'password': RSA_encrypt(pubkey, password),  #password,
        'ppui_logintime':
        str(random.randint(52000, 58535)),  #int(login_start)-int(login_init),
        'quick_user': '******',
        'safeflg': '0',
        'staticpage':
        'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html',
        'token': token,
        'tpl': 'netdisk',
        'tt': login_start,
        'u': PAN_INDEX,
        'username': username,
        'verifycode': '',
        'subpro': '',
        'logLoginType': 'pc_loginBasic',
        'crypttype': '12',
        'rsakey': rsakey,
        'idc:': '',
    }
    xml = fetch(url, post, utils.myname(), header).decode('utf-8')
    img = re.search('"(err_no=[^"]*)"', xml).group(1)
    import urllib.parse
    idict = dict(urllib.parse.parse_qsl(img))
    logger.debug("idict : %s" % idict)

    return (xml, idict)
Exemplo n.º 19
0
def get_public_key(token):

    header = {
        'Host': PASSPORT_HOST,
        'Referer': PAN_INDEX,
    }
    cbs = utils.cbs_token()

    url = 'https://passport.baidu.com/v2/getpublickey?token=' + \
    token + '&tpl=netdisk&apiver=v3&tt=' + utils.tt() + '&callback=' + cbs
    xml = fetch(url, {}, utils.myname(), header).decode('utf-8')
    keystr = re.search("\(([^\)]*)\)",
                       xml).group(1).replace("'", '"').replace('\t', '')
    logger.debug("key str:%s" % keystr)
    keydict = eval(keystr)
    logger.debug("keydict:%s" % keydict)
    rsakey = keydict['key']
    pubkey = keydict['pubkey']
    logger.debug("rsakey:%s" % rsakey)
    logger.debug("pubkey:%s" % pubkey)
    return (rsakey, pubkey)
    def train(self, env, episodes, time_steps):
        stats = EpisodeStats(episode_lengths=np.zeros(episodes),
                             episode_rewards=np.zeros(episodes))

        for i_episode in range(1, episodes + 1):
            # Generate an episode.
            # An episode is an array of (state, action, reward) tuples
            episode = []
            s = env.reset()
            for t in range(time_steps):
                a, log_prob_a = self.get_action(s)
                ns, r, d, _ = env.step(a)

                stats.episode_rewards[i_episode - 1] += r
                stats.episode_lengths[i_episode - 1] = t

                episode.append((s, a, log_prob_a, r))

                if d:
                    break
                s = ns

            # collect all rewards at one place
            T = len(episode)
            G = 0.0

            for t in reversed(range(T)):
                s, a, log_prob, r = episode[t]
                G = self._gamma * G + r

                baseline = self._V(tt(s))
                advantage = G - baseline
                self._train_baseline(G, baseline)
                self._train_policy(advantage, t, log_prob)

            print("\r{} Steps in Episode {}/{}. Reward {}".format(
                len(episode), i_episode, episodes,
                sum([e[3] for i, e in enumerate(episode)])))
        return stats
Exemplo n.º 21
0
def list_path(path,num,dry,bdstoken):
	logger.info("Listing path %s."%path)
	settings.DRY = dry
	header = {
	'Host':PAN_HOST,
	'Referer':DISK_HOME,
	}
	t = utils.tt()
	t2 = str(int(t) + 2)
	if path:
		_path = urllib.parse.urlencode({"dir":path})
	else:
		_path = urllib.parse.urlencode({"dir":'/'})
	url = PAN_INDEX + '/api/list?channel=chunlei&clienttype=0&web=1&num=' + \
	str(num) + '&t=' + t + '&page=1&' + _path + \
	'&showempty=0&order=time&desc=1&_='+ t2 +  \
	 '&bdstoken=' + bdstoken + "&app_id=250528"
	xml = fetch(url,{},utils.myname(),header,path)

	list_json = json.loads(xml.decode("utf-8"))
	if list_json:
		return list_json
	else:	
		return None
Exemplo n.º 22
0
def table_for_robustness(robustness_measure):
    global datasets
    data = df[df['robustness'] == robustness_measure]
    # each count should be 1
    # data.groupby(["metric", "dataset"])['value'].count()

    pivot = data.pivot_table(values="value", index="metric", columns="dataset", aggfunc="first") \
        .rename_axis(None)
    pivot.columns = pivot.columns.astype(list)
    pivot = pivot.reset_index()  # .rename({"index": }, axis=1)

    column_format = "|p{40mm}|" + "|".join(
        "c" * (len(datasets))
        for experiment, datasets in experiment_datasets.items()) + "|"

    float_formatter = ffloat if robustness_measure != "RankInstability" else fffloat

    latex = pivot.to_latex(
        escape=False,
        index=False,
        # index_names=False,
        caption=robustness_measure + " of " + str(len(metrics)) +
        " metrics on " + str(len(datasets)) + " datasets (" + experiments_str +
        ")",
        label="tab:robustness-" + robustness_measure[4:].lower(),
        column_format=column_format,
        header=[small(bf(robustness_measure))] +
        [tiny(tt(col)) for col in datasets],
        formatters=[lambda v: small(v)] +
        [lambda v: small(ffloat(v))] * len(datasets),
    )
    latex = modify_tabular(latex,
                           in_table=False,
                           prefix="\scalebox{1}{\n",
                           postfix="\n}")
    return latex
Exemplo n.º 23
0
def list_path(path, num, dry, bdstoken):
    logger.info("Listing path %s." % path)
    settings.DRY = dry
    header = {
        'Host': PAN_HOST,
        'Referer': DISK_HOME,
    }
    t = utils.tt()
    t2 = str(int(t) + 2)
    if path:
        _path = urllib.parse.urlencode({"dir": path})
    else:
        _path = urllib.parse.urlencode({"dir": '/'})
    url = PAN_INDEX + '/api/list?channel=chunlei&clienttype=0&web=1&num=' + \
    str(num) + '&t=' + t + '&page=1&' + _path + \
    '&showempty=0&order=time&desc=1&_='+ t2 +  \
     '&bdstoken=' + bdstoken + "&app_id=250528"
    xml = fetch(url, {}, utils.myname(), header, path)

    list_json = json.loads(xml.decode("utf-8"))
    if list_json:
        return list_json
    else:
        return None
Exemplo n.º 24
0
        else:
            return ("%.0f" % n) + " " + name + "s"

    return ", ".join(filter(None, [comp(days, "day"), comp(hours, "hour"), comp(minutes, "min")]))


df = pd.read_excel("perf_experiments.xlsx") \
    .rename({"experiment": "Experiment"}, axis=1)
df['Total CPU time'] = df['time_user'].apply(format_duration)
df['n_graphs_total'] = df['n_datasets'] * df['n_graphs']
df['Avg CPU time per graph'] = (df['time_user'] / df['n_graphs_total']).apply(format_duration)

cols = ['Experiment', 'Total CPU time', 'Avg CPU time per graph']
df = df[cols]

df['Experiment'] = df['Experiment'].apply(lambda e: tt(e))

# %%

with open("perf_experiments_table.tex", "w") as f:
    f.write("")
    latex = df.to_latex(
        index=False,
        escape=False,
        column_format="|l|r|r|",
        caption="CPU Computation time of the 3 experiments evaluated by \\graffs, run on the \\texttt{rio} computing cluster (see \\autoref{sec:computing_cluster}).\n"
                "\\textsl{Total CPU time} is the sum of all times of individual CPU cores spent on evaluating the experiment, "
                "and \\textsl{Avg CPU time per graph} is that divided by $(\\text{number of datasets}) \\times (\\text{number of graphs generated from each dataset})$.",
        label="tab:perf_experiments_table",
    )
    latex = modify_tabular(latex, prefix="\scalebox{0.8}{\n", postfix="\n}")
Exemplo n.º 25
0
    def train(self):
        for i in range(self.n_episodes):
            state = self.env.reset()

            for step in range(self.time_steps):
                if self.render:
                    self.env.render()

                state = tt(state)
                action = self.actor(state).cpu().detach().numpy()

                noise = np.random.normal(0,
                                         0.1,
                                         size=self.env.action_space.shape[0])
                action = np.clip(action + noise, self.env.action_space.low[0],
                                 self.env.action_space.high[0])
                next_state, reward, done, _ = self.env.step(action)

                # Save step in memory
                self.replay_memory.append(state=state,
                                          action=action,
                                          reward=reward,
                                          next_state=next_state,
                                          done=done)

                res = {
                    'episodes': i + 1,
                    'states': state.tolist(),
                    'rewards': reward,
                    'steps': step + 1
                }

                # Start training, if batch size reached
                if len(self.replay_memory) < self.batch_size:
                    self.res = self.res.append([res])
                    continue

                # Sample batch from memory
                states, actions, rewards, next_states, dones = self.replay_memory.sample_batch(
                )

                # Critic loss
                q1, q2 = self.critic(states, actions)
                next_actions = self.actor_target(next_states)

                noise = tt(torch.Tensor(actions.cpu()).data.normal_(0, 0.2))
                noise = noise.clamp(-0.5, 0.5)
                next_actions = (next_actions + noise).clamp(
                    self.env.action_space.low[0],
                    self.env.action_space.high[0])
                # Get next state q values by Clipped Double Q-Learning
                q1_ns, q2_ns = self.critic_target(next_states,
                                                  next_actions.detach())
                q_ns = torch.min(q1_ns, q2_ns)
                td_target = rewards + self.gamma * q_ns

                loss_critic = self.critic_loss_fct(
                    q1, td_target) + self.critic_loss_fct(q2, td_target)
                res['critic_losses'] = float(loss_critic)

                # Optimize critic
                self.critic_optim.zero_grad()
                loss_critic.backward()
                self.critic_optim.step()

                # Delayed Policy Updates
                if step % self.pi_update_steps == 0:
                    q1, _ = self.critic(states, self.actor(states))
                    # Actor loss
                    loss_actor = -q1.mean()
                    res['actor_losses'] = float(loss_actor)

                    # Optimize actor
                    self.actor_optim.zero_grad()
                    loss_actor.backward()
                    self.actor_optim.step()

                    # update target networks
                    for param, target_param in zip(
                            self.critic.parameters(),
                            self.critic_target.parameters()):
                        target_param.data.copy_(self.tau * param.data +
                                                (1 - self.tau) *
                                                target_param.data)

                    for param, target_param in zip(
                            self.actor.parameters(),
                            self.actor_target.parameters()):
                        target_param.data.copy_(self.tau * param.data +
                                                (1 - self.tau) *
                                                target_param.data)

                self.res = self.res.append([res])
                state = next_state
                if done:
                    break

            logging.info(f'Episode {i + 1}:')
            logging.info(
                f'\t Steps: {self.res.loc[self.res["episodes"] == i + 1]["steps"].max()}'
            )
            logging.info(
                f'\t Reward: {self.res.loc[self.res["episodes"] == i + 1]["rewards"].sum()}'
            )

        self.env.close()
        return self.res
Exemplo n.º 26
0
def add_task(bdstoken, t_url, save_path, dia):
    header = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Host': PAN_HOST,
        'Referer': DISK_HOME,
    }
    url = CLOUD_DL + '?bdstoken=' + bdstoken + '&channel=chunlei&clienttype=0&web=1'
    post = {
        'method': 'add_task',
        'app_id': '250528',
        'source_url': t_url,
        'save_path': save_path,
        'type': '3',
    }
    xml = fetch(url, post, utils.myname(), header, save_path)
    j = json.loads(xml.decode("utf-8"))
    logger.debug("json: %s " % str(j))

    if 'error_code' in list(j.keys()):
        logger.info(j['error_msg'])
        if j['error_code'] != 36022:
            while 'vcode' in list(j.keys()):
                vcode = j['vcode']
                logger.info(vcode)
                imgurl = j['img']
                #f=open(vimg,"wb")
                #fp = fetch(imgurl,{},"Input Vcode")
                #f.write(fp)
                #f.close()
                #try:
                #	subprocess.Popen(['xdg-open', vimg])
                #except:
                #	print("please open file %s to check the vcode."%vimg)
                #mag = re.search('(&.*$)',t_url).group(1)
                #task_name = dict(urllib.parse.parse_qsl(mag))['dn']

                #logger.info("Please input vcode for task: %s ."%(task_name))
                vd = VcodeDialog(dia, imgurl)
                vd.new_url(imgurl)
                response = vd.run()
                print(response)
                if response == 22:
                    print("The OK button was clicked")
                    vf = vd.get_user_input()
                    vd.destroy()
                elif response == Gtk.ResponseType.DELETE_EVENT:
                    vd.destroy()
                #input("verification code # ").strip()

                add = {
                    'file_sha1': '',
                    'selected_idx': '1,2,3,4',
                    'task_from': '0',
                    't': utils.tt(),
                    'type': 4,
                    'input': vf,
                    'vcode': vcode,
                }
                print(add)
                post.update(add)
                xml = fetch(url, post, "TryWithVcode", header, save_path)
                j = json.loads(xml.decode("utf-8"))
                logger.debug("json: %s " % str(j))
                if 'error_code' in list(j.keys()):
                    logger.info(j['error_msg'])
            return j
        else:

            return j['error_msg']
    logger.debug("json: %s " % str(j))

    return j
 def _train_baseline(self, G, baseline):
     self._V_optimizer.zero_grad()
     loss = self._loss_function(tt(np.array([G])), baseline)
     loss.backward(retain_graph=True)
     self._V_optimizer.step()
Exemplo n.º 28
0
def add_task(bdstoken,t_url,save_path,dia):
	header = {
	'Content-Type':'application/x-www-form-urlencoded',
	'Host':PAN_HOST,
	'Referer':DISK_HOME,
	}
	url = CLOUD_DL + '?bdstoken=' + bdstoken + '&channel=chunlei&clienttype=0&web=1'
	post = {
	'method':'add_task',
	'app_id':'250528',
	'source_url':t_url,
	'save_path':save_path,
	'type':'3',
	}
	xml = fetch(url,post,utils.myname(),header,save_path)
	j = json.loads(xml.decode("utf-8"))
	logger.debug("json: %s "% str(j))
	
	if 'error_code' in list(j.keys()):
		logger.info(j['error_msg'])
		if j['error_code'] != 36022 :
			while 'vcode' in list(j.keys()):
				vcode = j['vcode']
				logger.info(vcode)
				imgurl = j['img']
				#f=open(vimg,"wb")
				#fp = fetch(imgurl,{},"Input Vcode")
				#f.write(fp)
				#f.close()
				#try:
				#	subprocess.Popen(['xdg-open', vimg])
				#except:
				#	print("please open file %s to check the vcode."%vimg)
				#mag = re.search('(&.*$)',t_url).group(1)
				#task_name = dict(urllib.parse.parse_qsl(mag))['dn']

				#logger.info("Please input vcode for task: %s ."%(task_name))
				vd = VcodeDialog(dia,imgurl)
				vd.new_url(imgurl)
				response = vd.run()
				print(response)
				if response == 22:
					print("The OK button was clicked")
					vf = vd.get_user_input()
					vd.destroy()
				elif  response == Gtk.ResponseType.DELETE_EVENT:
					vd.destroy()
				#input("verification code # ").strip()
				
				add = {
				'file_sha1':'',
				'selected_idx':'1,2,3,4',
				'task_from':'0',
				't':utils.tt(),
				'type':4,
				'input':vf,
				'vcode':vcode,
				}
				print(add)
				post.update(add)
				xml = fetch(url,post,"TryWithVcode",header,save_path)
				j = json.loads(xml.decode("utf-8"))
				logger.debug("json: %s "% str(j))
				if 'error_code' in list(j.keys()):
					logger.info(j['error_msg'])
			return j
		else:
			
			return j['error_msg']
	logger.debug("json: %s "% str(j))

	return j
Exemplo n.º 29
0
 def predict(self, X):
     output = self.net(tt(X)).detach()
     return output
Exemplo n.º 30
0
 def get_action(self, x, epsilon):
     u = np.argmax(self._q(tt(x)).cpu().detach().numpy())
     r = np.random.uniform()
     if r < epsilon:
         return np.random.randint(self._action_dim)
     return u
Exemplo n.º 31
0
    def train(self):
        for i in range(self.n_episodes):
            steps = 0
            state = self.env.reset()

            for step in range(self.time_steps):
                if self.render:
                    self.env.render()

                state = tt(state)
                action = self.actor(state).detach().numpy()

                # Exploration
                p = np.random.random()
                if p < self.eps:
                    action = np.random.uniform(low=-1, high=1, size=(1, ))
                # Do one step in env
                next_state, reward, done, _ = self.env.step(action)

                res = {
                    'episodes': i + 1,
                    'states': state.tolist(),
                    'rewards': reward,
                    'steps': step + 1
                }

                # Save step in memory
                self.replay_memory.append(state=state,
                                          action=action,
                                          reward=reward,
                                          next_state=next_state,
                                          done=done)

                # Start training, if batch size reached
                if len(self.replay_memory) < self.batch_size:
                    continue

                # Sample batch from memory
                states, actions, rewards, next_states, dones = self.replay_memory.sample_batch(
                )

                # Critic loss
                q_values = self.critic(states, actions)
                next_actions = self.actor_target(next_states)
                q_values_ns = self.critic_target(next_states,
                                                 next_actions.detach())
                td_target = rewards + self.gamma * q_values_ns
                loss_critic = self.critic_loss_fct(q_values, td_target)

                # Actor loss
                loss_actor = -(self.critic(states, self.actor(states)).mean())

                # Optimize actor
                self.actor_optim.zero_grad()
                loss_actor.backward()
                self.actor_optim.step()

                # Optimize critic
                self.critic_optim.zero_grad()
                loss_critic.backward()
                self.critic_optim.step()

                # update target networks
                for target_param, param in zip(self.actor_target.parameters(),
                                               self.actor.parameters()):
                    target_param.data.copy_(param.data * self.tau +
                                            target_param.data *
                                            (1.0 - self.tau))

                for target_param, param in zip(self.critic_target.parameters(),
                                               self.critic.parameters()):
                    target_param.data.copy_(param.data * self.tau +
                                            target_param.data *
                                            (1.0 - self.tau))

                self.res = self.res.append([res])

                state = next_state
                steps += 1

                if done:
                    break

            logging.info(f'Episode {i + 1}:')
            logging.info(
                f'\t Steps: {self.res.loc[self.res["episodes"] == i + 1]["steps"].max()}'
            )
            logging.info(
                f'\t Reward: {self.res.loc[self.res["episodes"] == i + 1]["rewards"].sum()}'
            )

        self.env.close()
        return self.res
Exemplo n.º 32
0
    def train(self,
              env,
              episodes,
              time_steps,
              initial_state=None,
              initial_noise=0.5):

        stats = EpisodeStats(episode_lengths=np.zeros(episodes),
                             episode_rewards=np.zeros(episodes),
                             episode_loss=np.zeros(episodes))

        self._run += 1

        for e in range(episodes):
            # Generate an episode.
            # An episode is an array of (state, action, reward) tuples
            episode = []
            s = env.reset(initial_state=initial_state,
                          noise_amplitude=initial_noise)

            total_r = 0
            for t in range(time_steps):
                a = self._get_action(s)
                ns, r, d, _ = env.step(tn(self._action_fun.act2env(a)))

                stats.episode_rewards[e] += r
                stats.episode_lengths[e] = t

                episode.append((s, a, r))

                total_r += r

                if d:
                    break
                s = ns

            gamma_t = 1
            for t in range(len(episode)):
                # Find the first occurrence of the state in the episode
                s, a, r = episode[t]

                g = 0
                gamma_kt = 1
                for k in range(t, len(episode)):
                    gamma_kt = gamma_kt * self._gamma
                    _, _, r_k = episode[k]
                    g = g + (gamma_kt * r_k)

                g = float(g)

                p = self._pi(s, a)

                # For Numerical Stability, in order to not get probabilities higher than one (e.g. delta distribution)
                # and to not return a probability equal to 0 because of the log in the score_function
                eps = 1e-8
                p = p.clamp(eps, 1)

                log_p = torch.log(p)

                gamma_t = gamma_t * self._gamma

                if self._baseline:
                    bl = self.baseline_fun(s)
                    delta = g - bl

                    bl_loss = self._bl_loss_function(self.baseline_fun(s),
                                                     tt([g]))

                    self._bl_optimizer.zero_grad()
                    bl_loss.backward()
                    self._bl_optimizer.step()

                    score_fun = torch.mean(-(gamma_t * delta) * log_p)
                else:
                    score_fun = torch.mean(-(gamma_t * g) * log_p)

                stats.episode_loss[e] += score_fun.item()

                self._pi_optimizer.zero_grad()
                score_fun.backward()
                self._pi_optimizer.step()

            pr_stats = {
                'run': self._run,
                'steps': int(stats.episode_lengths[e] + 1),
                'episode': e + 1,
                'episodes': episodes,
                'reward': stats.episode_rewards[e],
                'loss': stats.episode_loss[e]
            }
            print_stats(pr_stats)

        return stats