Пример #1
0
class LeafSet(object):
    __slots__ = ('peers', 'capacity')
    __passthru = {'get', 'clear', 'pop', 'popitem', 'peekitem', 'key'}
    __iters = {'keys', 'values', 'items'}

    def __init__(self, my_key, iterable=(), capacity=8):
        try:
            iterable = iterable.items()  # view object
        except AttributeError:
            pass
        tuple_itemgetter = Peer.distance(my_key, itemgetter(0))
        key_itemgetter = Peer.distance(my_key)
        self.capacity = capacity
        self.peers = SortedDict(key_itemgetter)
        if iterable:
            l = sorted(iterable, key=tuple_itemgetter)
            self.peers.update(islice(l, capacity))

    def clear(self):
        self.peers.clear()

    def prune(self):
        extra = len(self) - self.capacity
        for i in range(extra):
            self.peers.popitem(last=True)

    def update(self, iterable):
        try:
            iterable = iterable.items()  # view object
        except AttributeError:
            pass
        iterable = iter(iterable)
        items = tuple(islice(iterable, 500))
        while items:
            self.peers.update(items)
            items = tuple(islice(iterable, 500))


    def setdefault(self, *args, **kwargs):
        self.peers.setdefault(*args, **kwargs)
        self.prune()

    def __setitem__(self, *args, **kwargs):
        self.peers.__setitem__(*args, **kwargs)
        self.prune()

    def __getitem__(self, *args, **kwargs):
        return self.peers.__getitem__(*args, **kwargs)

    def __delitem__(self, *args, **kwargs):
        return self.peers.__delitem__(*args, **kwargs)

    def __iter__(self, *args, **kwargs):
        return self.peers.__iter__(*args, **kwargs)

    def __reversed__(self, *args, **kwargs):
        return self.peers.__reversed__(*args, **kwargs)

    def __contains__(self, *args, **kwargs):
        return self.peers.__contains__(*args, **kwargs)

    def __len__(self, *args, **kwargs):
        return self.peers.__len__(*args, **kwargs)

    def __getattr__(self, key):
        if key in self.__class__.__passthru:
            return getattr(self.peers, key)
        elif key in self.__class__.__iters:
            return getattr(self.peers, 'iter' + key)
        else:
            return super().__getattr__(key)

    def __repr__(self):
        return '<%s keys=%r capacity=%d/%d>' % (
            self.__class__.__name__, list(self), len(self), self.capacity)
Пример #2
0
class RL():
    def __init__(self, sess, env, n_s, n_a, args):

        if sess is None:
            self.sess = tf.Session()
        else:
            self.sess = sess

        self.args = args
        self.env = env
        self.env.seed(self.args.seed)

        self.n_s = n_s
        self.n_a = n_a

        self.init = True  ### Use to detect is in init state or not, if Yes use random action

        self.ite_count = 0  ### Num of iter

        self.dict = SortedDict()

        self.release = 10
        self.reward_ = 200  ### use to clip reward

        self.save_index = 0  ### Save index

        self.network_model()
        self.saver = tf.compat.v1.train.Saver()
        tf.compat.v1.random.set_random_seed(args.seed)

    def network_model(self):
        def init_weights(input_):
            x = 1 / (np.sqrt(input_))
            return tf.compat.v1.random_uniform_initializer(-x,
                                                           x,
                                                           seed=self.args.seed)

        def behavior_build_network():
            w_init = tf.compat.v1.initializers.variance_scaling(
                scale=np.sqrt(2 / (1 + np.sqrt(5)**2)),
                distribution='uniform',
                seed=self.args.seed)

            l1 = tf.layers.dense(
                inputs=self.input_,
                units=self.args.hidden_units,
                kernel_initializer=w_init,
                bias_initializer=init_weights(self.n_s),
                activation='sigmoid',
                name='l1',
            )
            c1 = tf.layers.dense(inputs=tf.concat((self.d_r, self.d_h), 1),
                                 units=self.args.hidden_units,
                                 kernel_initializer=w_init,
                                 bias_initializer=init_weights(
                                     self.args.hidden_units),
                                 activation='sigmoid',
                                 name='c_out')

            out_1 = tf.math.multiply(l1, c1)

            l2 = tf.layers.dense(
                inputs=out_1,
                units=self.n_a,
                activation=None,
                kernel_initializer=w_init,
                bias_initializer=init_weights(self.args.hidden_units),
                name='l2',
            )

            # b=tf.layers.dense(
            # 	inputs=l2,
            # 	units=self.n_a,
            # 	kernel_initializer=w_init,
            # 	bias_initializer=init_weights(self.args.hidden_units),
            # 	activation=None,
            # 	name='out'
            # 	)
            b = l2
            return b

        ### 										ALL input
        self.input_ = tf.compat.v1.placeholder(tf.float32, [None, self.n_s],
                                               'input_')

        self.c_in = tf.compat.v1.placeholder(tf.float32, [None, 2], 'c_in')
        self.d_h = tf.compat.v1.placeholder(tf.float32, [None, 1], 'd_h')
        self.d_r = tf.compat.v1.placeholder(tf.float32, [None, 1], 'd_r')

        self.a = tf.compat.v1.placeholder(tf.int32, [
            None,
        ], 'action')

        with tf.compat.v1.variable_scope('behavior_function'):
            self.b = behavior_build_network()
            self.b_softmax = tf.nn.softmax(self.b)
            self.a_out = tf.squeeze(
                tf.random.categorical(logits=self.b,
                                      num_samples=1,
                                      seed=self.args.seed))

        with tf.compat.v1.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.b,
                                                               labels=self.a))

        with tf.compat.v1.variable_scope('train'):
            self.train_op = tf.compat.v1.train.AdamOptimizer(
                self.args.lr).minimize(self.loss)

    def action_choice(self, s, c, dr, dh):
        s = np.asarray(s, dtype=np.float32).reshape((1, self.n_s))
        dr = np.asarray(dr).reshape((-1, 1))
        dh = np.asarray(dh).reshape((-1, 1))
        action = self.sess.run(self.a_out, {
            self.input_: s,
            self.d_r: dr,
            self.d_h: dh
        })
        return action

    def get_(self):
        if self.init:
            self.desire_r_init, self.desire_h_init = 0, 0
            return
        h = []
        r = []

        for _ in range(self.args.generate_per_single_training):
            epoides = self.dict.popitem()
            h.append(len(epoides[1][0]))
            r.append(epoides[0])

        seleceted_eposide_len = np.mean(h)
        seleceted_eposide_mean = np.random.uniform(low=np.mean(r),
                                                   high=(np.mean(r) +
                                                         np.std(r)))
        self.desire_r_init, self.desire_h_init = seleceted_eposide_mean, seleceted_eposide_len

    def feed(self):
        self.get_()
        self.dict.clear()
        for _ in range(self.args.memory_thersold):
            state, action, reward, total_reward = self.play()
            self.dict.__setitem__(total_reward, (state, action, reward))
        self.init = False

    def play(self):
        s = self.env.reset()
        if self.ite_count == 0:
            self.sess.run(tf.compat.v1.global_variables_initializer())

        state_list = []
        action_list = []
        reward_list = []

        reward_total = 0
        done = False

        desire_h = self.desire_h_init
        desire_r = self.desire_r_init

        while not done:
            c = np.asarray([desire_h, desire_r])

            if self.init:
                a = np.random.randint(self.n_a)
            else:
                a = self.action_choice(s, c, desire_r, desire_h)

            s_, r, done, _ = self.env.step(a)

            state_list.append(s)
            action_list.append(a)
            reward_list.append(r)
            reward_total += r

            desire_h = max(desire_h - 1, 1)
            desire_r = min(desire_r - r, self.reward_)

            s = s_

            if done:
                break
        return state_list, action_list, reward_list, reward_total

    def learn(self):
        if self.ite_count == 0:
            self.sess.run(tf.compat.v1.global_variables_initializer())

        memory_dic = dict(self.dict)
        dic_value = list(memory_dic.values())

        for _ in range(self.args.n_update_eposide):
            state = []
            dr = []
            dh = []
            true_a = []
            c = []
            indices = np.random.choice(
                len(dic_value), self.args.batch_size,
                replace=True)  ######### random sample which eposide will use.
            tran = [dic_value[i] for i in indices]
            random_index = [np.random.choice(len(e[0]) - 2, 1) for e in tran]
            for idx_, tran_ in zip(random_index, tran):
                state.append(tran_[0][idx_[0]])
                dr.append(np.sum(tran_[2][idx_[0]:]))
                dh.append(len(tran_[0]) - idx_[0])
                true_a.append(tran_[1][idx_[0]])
                c.append([np.sum(tran_[2][idx_[0]:]), len(tran_[0]) - idx_[0]])

            command_ = np.asarray(c, dtype=np.float32).reshape(-1, 2)
            s_t = np.asarray(state, dtype=np.float32)
            action = np.asarray([a_ for a_ in true_a])
            dr = np.asarray(dr, dtype=np.float32).reshape((-1, 1))
            dh = np.asarray(dh, dtype=np.float32).reshape((-1, 1))
            _, loss = self.sess.run(
                [self.train_op, self.loss], {
                    self.input_: s_t,
                    self.c_in: command_,
                    self.a: action,
                    self.d_r: dr,
                    self.d_h: dh
                })

    def eval(self, eval_ite):
        test_reward = []
        test_step = []
        for i in range(self.args.eval_step):
            _, _, r_list, total_reward = self.play()
            test_reward.append(total_reward)
            test_step.append(len(r_list))
        print('ite: {},   reward: {:.3f},'.format(eval_ite,
                                                  np.mean(test_reward)))
        return np.mean(test_reward)

    def train(self):
        self.feed()
        test = []
        print(
            '----------------using tensorflow with {} generate_step_per_single_training----------------'
            .format(self.args.generate_per_single_training))
        while True:
            self.learn()
            self.ite_count += 1
            self.feed()
            if (self.ite_count - 1) % self.args.eval_step_every_k_step == 0:
                score = self.eval(self.ite_count - 1)
                test.append(score)
                if len(test) % self.release == 0 or (self.ite_count - 1) == 0:
                    # 					self.saver.save(self.sess,r'C:\Users\USER\Desktop\Upside down\new folder\result\memory_thersold\tensorflow_model_{}_tensorflow_categorical_1.ckpt'.format(self.args.generate_per_single_training))
                    self.saver.save(
                        self.sess, self.args.save_path + '\\' +
                        'tensorflow_model_{}_tensorflow_categorical_1.ckpt'.
                        format(self.args.generate_per_single_training))
                    # 					np.save(r'C:\Users\USER\Desktop\Upside down\new folder\result\memory_thersold\tensorflow_reward_test_{}_{}__tensorflow_categorical_1.npy'.format(self.save_index,self.args.generate_per_single_training),test)
                    print('saved')
                    self.save_index += 1
                    test = []
                    print((time.time() - start_) / 60)
                    start_ = time.time()
Пример #3
0
class UpsideDownRL(object):
    def __init__(self, env, args):
        super(UpsideDownRL, self).__init__()
        self.env = env
        self.args = args
        self.nb_actions = self.env.action_space.n
        self.state_space = self.env.observation_space.shape[0]

        # Use sorted dict to store experiences gathered.
        # This helps in fetching highest reward trajectories during exploratory stage.
        self.experience = SortedDict()
        self.B = BehaviorFunc(self.state_space, self.nb_actions, args).cuda()
        self.optimizer = optim.Adam(self.B.parameters(), lr=self.args.lr)
        self.use_random_actions = True  # True for the first training epoch.
        self.softmax = nn.Softmax()
        # Used to clip rewards so that B does not get unrealistic expected reward inputs.
        self.lunar_lander_max_reward = 250

    # Generate an episode using given command inputs to the B function.
    def gen_episode(self, dr, dh):
        state = self.env.reset()
        episode_data = []
        states = []
        rewards = []
        actions = []
        total_reward = 0
        while True:
            action = self.select_action(state, dr, dh)
            next_state, reward, is_terminal, _ = self.env.step(action)
            if self.args.render:
                self.env.render()
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            total_reward += reward
            state = next_state
            dr = min(dr - reward, self.lunar_lander_max_reward)
            dh = max(dh - 1, 1)
            if is_terminal:
                break

        return total_reward, states, actions, rewards

    # Fetch the desired return and horizon from the best trajectories in the current replay buffer
    # to sample more trajectories using the latest behavior function.
    def fill_replay_buffer(self):
        dr, dh = self.get_desired_return_and_horizon()
        self.experience.clear()
        for i in range(self.args.replay_buffer_capacity):
            total_reward, states, actions, rewards = self.gen_episode(dr, dh)
            self.experience.__setitem__(total_reward,
                                        (states, actions, rewards))

        if self.args.verbose:
            if self.use_random_actions:
                print("Filled replay buffer with random actions")
            else:
                print("Filled replay buffer using BehaviorFunc")
        self.use_random_actions = False

    def select_action(self, state, desired_return=None, desired_horizon=None):
        if self.use_random_actions:
            action = np.random.randint(self.nb_actions)
        else:
            action_prob = self.B(
                torch.from_numpy(state).cuda(),
                torch.from_numpy(np.array(desired_return,
                                          dtype=np.float32)).reshape(-1,
                                                                     1).cuda(),
                torch.from_numpy(
                    np.array(desired_horizon,
                             dtype=np.float32).reshape(-1, 1)).cuda())
            action_prob = self.softmax(action_prob)
            # create a categorical distribution over action probabilities
            dist = Categorical(action_prob)
            action = dist.sample().item()
        return action

    # Todo: don't popitem from the experience buffer since these best-performing trajectories can have huge impact on learning of B
    def get_desired_return_and_horizon(self):
        if (self.use_random_actions):
            return 0, 0

        h = []
        r = []
        for i in range(self.args.explore_buffer_len):
            episode = self.experience.popitem()  # will return in sorted order
            h.append(len(episode[1][0]))
            r.append(episode[0])

        mean_horizon_len = np.mean(h)
        mean_reward = np.random.uniform(low=np.mean(r),
                                        high=np.mean(r) + np.std(r))
        return mean_reward, mean_horizon_len

    def trainBehaviorFunc(self):
        experience_dict = dict(self.experience)
        experience_values = list(experience_dict.values())
        for i in range(self.args.train_iter):
            state = []
            dr = []
            dh = []
            target = []
            indices = np.random.choice(len(experience_values),
                                       self.args.batch_size,
                                       replace=True)
            train_episodes = [experience_values[i] for i in indices]
            t1 = [np.random.choice(len(e[0]) - 2, 1) for e in train_episodes]

            for pair in zip(t1, train_episodes):
                state.append(pair[1][0][pair[0][0]])
                dr.append(np.sum(pair[1][2][pair[0][0]:]))
                dh.append(len(pair[1][0]) - pair[0][0])
                target.append(pair[1][1][pair[0][0]])

            self.optimizer.zero_grad()
            state = torch.from_numpy(np.array(state)).cuda()
            dr = torch.from_numpy(
                np.array(dr, dtype=np.float32).reshape(-1, 1)).cuda()
            dh = torch.from_numpy(
                np.array(dh, dtype=np.float32).reshape(-1, 1)).cuda()
            target = torch.from_numpy(np.array(target)).long().cuda()
            action_logits = self.B(state, dr, dh)
            loss = nn.CrossEntropyLoss()
            output = loss(action_logits, target).mean()
            output.backward()
            self.optimizer.step()

    # Evaluate the agent using the initial command input from the best topK performing trajectories.
    def evaluate(self):
        testing_rewards = []
        testing_steps = []
        dr, dh = self.get_desired_return_and_horizon()
        for i in range(self.args.evaluate_trials):
            total_reward, states, actions, rewards = self.gen_episode(dr, dh)
            testing_rewards.append(total_reward)
            testing_steps.append(len(rewards))

        print("Mean reward achieved : {}".format(np.mean(testing_rewards)))
        return np.mean(testing_rewards)

    def train(self):
        # Fill replay buffer with random actions for the first time.
        self.fill_replay_buffer()
        iterations = 0
        test_returns = []
        while True:
            # Train behavior function with trajectories stored in the replay buffer.
            self.trainBehaviorFunc()
            self.fill_replay_buffer()

            if iterations % self.args.eval_every_k_epoch == 0:
                test_returns.append(self.evaluate())
                torch.save(self.B.state_dict(),
                           os.path.join(self.args.save_path, "model.pkl"))
                np.save(os.path.join(self.args.save_path, "testing_rewards"),
                        test_returns)
            iterations += 1
Пример #4
0
class Tweet():
    def __init__(self, text=None):
        if self.isstatusurl(text):
            self.__dict__ = TwitterUser().api.get_status(
                text.split('/')[-1]).__dict__
        elif isinstance(text, str) and os.path.isfile(text):
            self.photo_file = text
        elif not text:
            print('想好发啥再戳我😡😡😡')
        else:
            self._raw = text
            self._raw_chars = len(self._raw)
            self.wash()
            self.divide()
            self.pool()

            urls = self.extractURL(self._raw)
            if urls:
                if len(urls) > 5:
                    raise TooManyHyperLinks
                else:
                    self.raw_urls = urls
                    logging.info('计算原文中 url 个数和字符数...')
                    self.raw_urls_chars = sum(
                        [len(url) for url in self.raw_urls])
                    logging.info('原文中{}个 url,共{}个字符'.format(
                        len(urls), self.raw_urls_chars))
                    logging.info('正在生成短链接...')
                    self.shorten()
                    logging.info('短链接搞定')
                    self.shortens_chars = sum(
                        [len(url) for url in self.shortens])
                    self.buildOffset()

            self.tailor()

    @classmethod
    def extractURL(self, text):
        return re.findall(urlmarker.WEB_URL_REGEX, text)

    @classmethod
    def isurl(self, text):
        url = self.extractURL(text)
        if len(url) != 1 or len(set([text] + url)) != 1:
            return False
        else:
            return True

    @classmethod
    def isstatusurl(self, text):
        if self.isurl(text):
            if 'twitter.com' in text and 'status' in text:
                return True
            else:
                return False
        else:
            return False

    def wash(self):
        '''
            remove newlines and more than one spaces
        '''
        text = self._raw
        workspace = []
        for i in range(1, len(text)):
            if not text[i - 1].isspace():
                workspace.append(text[i - 1])
            elif not text[i].isspace():
                workspace.append(text[i - 1])
        text.replace('', ' ')
        if not text[-1].isspace():
            self.washed = ''.join(workspace) + text[-1]
        else:
            self.washed = ''.join(workspace)
        self.washed_chars = len(self.washed)

    def shorten(self):
        try:
            logging.debug('切换到开发账号...')
            dev_user = TwitterUser()
            logging.debug('开发账号切换成功,username: '******'生成中间 tweet...')
            middle_tweet = dev_user.api.update_status(self.raw_urls)
            logging.debug('获取短链...')
            self.shortens = [
                url['url'] for url in middle_tweet.entities['urls']
            ]
            logging.debug('一切正常,删除中间 tweet...')
            middle_tweet.destroy()
            logging.debug('中间 tweet 已被删除...')
        except Exception as e:
            print('something wrong with shorten(), ' + e)

    def divide(self):
        if fmod(self.washed_chars, 280) != 0:
            self.n_pages = floor(self.washed_chars / 280) + 1
        else:
            self.n_pages = floor(self.washed_chars / 280)

    def pool(self):
        # newline separated parts
        self.nsp = [
            item for item in self.washed.split('\n') if item is not None
        ]
        self.pool = flatten(
            [item.split(' ') for item in self.nsp if item is not None])

    def buildOffset(self):
        self.offset = SortedDict({0: self.pool[0]})
        for i in range(1, len(self.pool)):
            self.offset.__setitem__(
                self.offset.keys()[i - 1] + len(self.pool[i - 1]) + 1,
                self.pool[i])

    def buildPages(self):
        dividers = [n * 280 for n in range(1, self.n_pages)]
        indices = SortedList(
            set(dividers + [key for key in self.offset.keys()]))
        pages = SortedDict()
        for i in range(0, self.n_pages):
            # first page
            if i is 0:
                page = indices[0:indices.index(dividers[0]) - 1]
            # last page
            elif i is self.n_pages - 1:
                page = indices[indices.index(dividers[i - 1]) - 1:]
            else:
                page = indices[indices.index(dividers[i - 1]):indices.
                               index(dividers[i])]
            pages[i] = page

        # remove dividers that hasn't been existed
        for key in pages:
            for item in pages[key]:
                if item not in self.offset:
                    pages[key].remove(item)

        self.pagination = SortedDict()
        for i in range(self.n_pages):
            temp_key_list = [key for key in pages[i]]
            page_pool = []
            for key in temp_key_list:
                page_pool.append(self.offset[key])
            self.pagination[i] = ' '.join(page_pool)

    def tailor(self):
        # deal with urls
        if self.washed_chars > 280:
            # when text contains urls
            if not self.raw_urls:
                self.buildPages()
                self.tailored = self.pagination
            else:
                # when length still over 280
                if (self.washed_chars -
                        self.raw_urls_chars) + self.shortens_chars > 280:
                    self.buildPages()
                    self.tailored = self.pagination
                else:
                    self.tailored = self.washed
        # simple case
        else:
            self.tailored = self.washed