Пример #1
0
def PCA_on_training_model():
    file_list = interface.get_available_sha256()
    ex_list = np.array([
        pefeatures.PEFeatureExtractor().extract(interface.fetch_file(b))
        for b in file_list
    ])
    print("all_samples: ", ex_list.shape)
    # nor_list = normalize(ex_list, axis=0)
    # nor_list = MinMaxScaler().fit_transform(ex_list)

    nor_list, data_min, data_max, scale_, min_ = MinMaxImp(ex_list)

    pca = PCA(n_components=0.99).fit(nor_list)
    U, S, V = pca._fit(nor_list)
    # dic_elements = {"n_component":pca.n_components_, "scale_":scale_, "min_":min_}
    dic_elements = {"n_component": pca.n_components_}
    np.save("pca_models/features.npy", ex_list)
    np.save("pca_models/nor_features.npy", nor_list)
    np.save("pca_models/U.npy", U)
    np.save("pca_models/S.npy", S)
    np.save("pca_models/V.npy", V)
    np.save("pca_models/scale.npy", scale_)
    np.save("pca_models/min.npy", min_)
    createDictCSV("pca_models/dic_elements.csv", dic_elements)
    print("reduced dimension: ", pca.n_components_)
    return ex_list, nor_list, U, S, V
Пример #2
0
    def __init__(self,
                 sha256list,
                 random_sample=True,
                 maxturns=3,
                 output_path='evaded/blackbox/',
                 cache=False):
        self.cache = cache
        self.available_sha256 = sha256list
        self.action_space = spaces.Discrete(len(ACTION_LOOKUP))
        self.maxturns = maxturns
        self.feature_extractor = pefeatures.PEFeatureExtractor()
        self.random_sample = random_sample
        self.sample_iteration_index = 0
        self.output_path = os.path.join(
            os.path.dirname(
                os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
            output_path)
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        self.history = OrderedDict()

        self.samples = {}
        if self.cache:
            for sha256 in self.available_sha256:
                try:
                    self.samples[sha256] = interface.fetch_file(self.sha256)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

        self._reset()
Пример #3
0
def test_models(model, score_model, test_random=False):
    total = 200
    # baseline: choose actions at random
    if test_random:
        random_action = lambda bytez: np.random.choice(
            list(manipulate.ACTION_TABLE.keys()))
        random_success, misclassified = evaluate(random_action)
        total = len(sha256_holdout) - len(
            misclassified)  # don't count misclassified towards success

    # option 1: Boltzmann sampling from Q-function network output
    softmax = lambda x: np.exp(x) / np.sum(np.exp(x))
    boltzmann_action = lambda x: np.argmax(
        np.random.multinomial(1,
                              softmax(x).flatten()))
    # option 2: maximize the Q value, ignoring stochastic action space
    best_action = lambda x: np.argmax(x)

    fe = pefeatures.PEFeatureExtractor()

    def model_policy(model):
        shp = (1, ) + tuple(model.input_shape[1:])

        def f(bytez):
            # first, get features from bytez
            feats = fe.extract2(bytez)
            # feats = get_ob(bytez)
            q_values = model.predict(feats.reshape(shp))[0]
            action_index = best_action(q_values)  # alternative: best_action
            return ACTION_LOOKUP[action_index]

        return f

    # compare to keras models with windowlength=1
    dqn = load_model(model)
    # dqn = load_model('models/dqn.h5')
    dqn_success, _ = evaluate(model_policy(dqn))

    dqn_score = load_model(score_model)
    # dqn_score = load_model('models/dqn_score.h5')
    score_success, _ = evaluate(model_policy(dqn_score))

    # let's compare scores
    if test_random:
        random_result = "random:{}({}/{})".format(
            len(random_success) / total, len(random_success), total)
    else:
        random_result = "random:untested"

    print(random_result)
    blackbox_result = "blackbox:{}({}/{})".format(
        len(dqn_success) / total, len(dqn_success), total)
    print(blackbox_result)
    score_result = "score:{}({}/{})".format(
        len(score_success) / total, len(score_success), total)
    print(score_result)
    return random_result, blackbox_result, score_result
Пример #4
0
def test_models(model, score_model, agent_method, test_result, test_random=True, test_score=True):
    total = len(sha256_holdout)

    # baseline: choose actions at random
    if test_random:
        random_action = lambda bytez: np.random.choice(list(manipulate.ACTION_TABLE.keys()))
        random_success, misclassified = evaluate(random_action)
        total = len(sha256_holdout) - len(misclassified)  # don't count misclassified towards success

        with open(test_result, 'a+') as f:
            random_result = "random: {}({}/{})\n".format(len(random_success) / total, len(random_success), total)
            f.write(random_result)
            f.write("==========================\n")

    fe = pefeatures.PEFeatureExtractor()

    def agent_policy(agent):
        def f(bytez):
            # first, get features from bytez
            feats = fe.extract(bytez)
            action_index = agent.act(feats)
            return ACTION_LOOKUP[action_index]

        return f

    # ddqn
    env = gym.make('malware-v0')
    agent = agent_method(env)
    model_list = get_model_dir_list(model)
    for mm in model_list:
        agent.load(mm)
        success, _ = evaluate(agent_policy(agent))
        blackbox_result = "black: {}({}/{})".format(len(success) / total, len(success), total)
        with open(test_result, 'a+') as f:
            # 记录black各个model目录的结果
            f.write("{}->{}\n".format(mm, blackbox_result))

    with open(test_result, 'a+') as f:
        f.write("==========================\n")

    # score
    if test_score:
        env_score = gym.make('malware-score-v0')
        agent_score = agent_method(env_score)
        score_model_list = get_model_dir_list(score_model)
        for smm in score_model_list:
            agent_score.load(smm)
            score_success, _ = evaluate(agent_policy(agent_score))
            score_result = "score: {}({}/{})".format(len(score_success) / total, len(score_success), total)
            with open(test_result, 'a+') as f:
                f.write("{}->{}\n".format(smm, score_result))
Пример #5
0
def test_model():

    T = 80  # total mutations allowed
    success = 0
    rn = dqeaf.RangeNormalize(-0.5, 0.5)
    fe = pefeatures.PEFeatureExtractor()
    episode = 0

    for file in onlyfiles:
        try:
            with open(os.path.join(input_folder, file), 'rb') as infile:
                bytez = infile.read()
        except IOError:
            raise FileRetrievalFailure("Unable to read sha256 from")
        state = fe.extract(bytez)
        state_norm = rn(state)
        episode = episode + 1
        state_norm = torch.from_numpy(state_norm).float().unsqueeze(0).to(
            device)
        for mutation in range(1, T):

            actions = model.forward(state_norm)
            print(actions)

            action = torch.argmax(actions).item()
            action = ACTION_LOOKUP[action]
            bytez = manipulate.modify_without_breaking(bytez, [action])
            new_label = interface.get_score_local(bytez)
            print('episode : ' + str(episode))
            print('mutation : ' + str(mutation))
            print('test action : ' + str(action))
            print('new label : ' + str(new_label))
            state = fe.extract(bytez)
            state_norm = rn(state)
            state_norm = torch.from_numpy(state_norm).float().unsqueeze(0).to(
                device)

            if (new_label < 0.90):
                with open(os.path.join(output_folder, file + '.exe'),
                          mode='wb') as file1:
                    file1.write(bytes(bytez))
                break
Пример #6
0
def tt_models(RL, test_result, test_random=True):
    total = len(sha256_holdout)

    # baseline: choose actions at random
    if test_random:
        random_action = lambda bytez: np.random.choice(
            list(manipulate.ACTION_TABLE.keys()))
        random_success, misclassified = evaluate(random_action)
        total = len(sha256_holdout) - len(
            misclassified)  # don't count misclassified towards success

        with open(test_result, 'a+') as f:
            random_result = "random: {}({}/{})\n".format(
                len(random_success) / total, len(random_success), total)
            f.write(random_result)
            f.write("===========\n")

    fe = pefeatures.PEFeatureExtractor()

    def agent_policy(agent):
        def f(bytez):
            # first, get features from bytez
            feats = fe.extract(bytez)
            action_index = agent.act(feats)
            return ACTION_LOOKUP[action_index]

        return f

    # ddqn
    # env = gym.make('malware-test-v0')
    print("black box")

    success, _ = evaluate(agent_policy(RL))
    blackbox_result = "black: {}({}/{})".format(
        len(success) / total, len(success), total)
    with open(test_result, 'a+') as f:
        # 记录black各个model目录的结果
        f.write("{}\n".format(blackbox_result))

    with open(test_result, 'a+') as f:
        f.write("==========================\n")
Пример #7
0
def test_model():

    total_reward = 0
    F = 200  #total test files
    T = 80  # total mutations allowed
    ratio = F * 0.5  # if number of mutations generated is half the total size
    success = 0
    rn = RangeNormalize(-0.5, 0.5)
    fe = pefeatures.PEFeatureExtractor()

    for episode in range(1, F):
        state = env.reset()
        state_norm = rn(state)
        state_norm = torch.from_numpy(state_norm).float().unsqueeze(0).to(
            device)
        for mutation in range(1, T):

            actions = current_model.forward(state_norm)
            print(actions)

            action = torch.argmax(actions).item()
            next_state, reward, done, _ = env.step(action)
            print('episode : ' + str(episode))
            print('mutation : ' + str(mutation))
            print('test action : ' + str(action))
            print('test reward : ' + str(reward))
            state = next_state
            state_norm = rn(state)
            state_norm = torch.from_numpy(state_norm).float().unsqueeze(0).to(
                device)

            if (done):
                success = success + 1
                break

        if success >= ratio:
            print('success : ' + str(success))
            return True

    print('success : ' + str(success))
    return False
Пример #8
0
    def __init__(self, sha256list, random_sample=True, maxturns=3, output_path='evaded/blackbox/', cache=True,
                 test=False):
        # PCA部分
        # features, nor_features, U, S, V, scale_, min_, pca_component = self.load_PCA_model()
        # self.PCA_V = V
        # self.feature_scale_ = scale_
        # self.feature_min_ = min_
        # self.PCA_component = pca_component

        self.total_turn = 0
        self.episode = -1  # 共训练了多少轮
        self.cache = cache
        self.available_sha256 = sha256list
        self.action_space = spaces.Discrete(len(ACTION_LOOKUP))
        self.maxturns = maxturns
        self.feature_extractor = pefeatures.PEFeatureExtractor()
        self.random_sample = random_sample
        self.sample_iteration_index = 0
        self.test = test
        self.output_path = os.path.join(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.abspath(__file__)))), output_path)
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        self.history = OrderedDict()
        self.current_reward = 0

        self.samples = {}
        if self.cache:
            for sha256 in self.available_sha256:
                try:
                    self.samples[sha256] = interface.fetch_file(sha256, self.test)
                except interface.FileRetrievalFailure:
                    print("failed fetching file")
                    continue  # try a new sha256...this one can't be retrieved from storage

        self._reset()
Пример #9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--outdir', type=str, default='models')
    parser.add_argument('--test', action='store_true')
    parser.add_argument('--gpu', action='store_true')
    parser.add_argument('--final-exploration-steps', type=int, default=10 ** 4)
    parser.add_argument('--start-epsilon', type=float, default=1.0)
    parser.add_argument('--end-epsilon', type=float, default=0.1)
    parser.add_argument('--load', type=str, default=None)
    parser.add_argument('--steps', type=int, default=30000)
    parser.add_argument('--prioritized-replay', action='store_false')
    parser.add_argument('--episodic-replay', action='store_true')
    parser.add_argument('--replay-start-size', type=int, default=1000)
    parser.add_argument('--target-update-interval', type=int, default=10 ** 2)
    parser.add_argument('--target-update-method', type=str, default='hard')
    parser.add_argument('--soft-update-tau', type=float, default=1e-2)
    parser.add_argument('--update-interval', type=int, default=1)
    parser.add_argument('--eval-n-runs', type=int, default=80)
    parser.add_argument('--eval-interval', type=int, default=1000)
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--minibatch-size', type=int, default=None)
    parser.add_argument('--test-random', action='store_true')
    parser.add_argument('--rounds', type=int, default=3)
    args = parser.parse_args()

    class QFunction(chainer.Chain):
        def __init__(self, obs_size, n_actions, n_hidden_channels=None):
            super(QFunction, self).__init__()
            if n_hidden_channels is None:
                n_hidden_channels = net_layers
            net = []
            inpdim = obs_size
            for i, n_hid in enumerate(n_hidden_channels):
                net += [('l{}'.format(i), L.Linear(inpdim, n_hid))]
                # net += [('norm{}'.format(i), L.BatchNormalization(n_hid))]
                net += [('_act{}'.format(i), F.relu)]
                net += [('_dropout{}'.format(i), F.dropout)]
                inpdim = n_hid

            net += [('output', L.Linear(inpdim, n_actions))]

            with self.init_scope():
                for n in net:
                    if not n[0].startswith('_'):
                        setattr(self, n[0], n[1])

            self.forward = net

        def __call__(self, x, test=False):
            """
            Args:
                x (ndarray or chainer.Variable): An observation
                test (bool): a flag indicating whether it is in test mode
            """
            for n, f in self.forward:
                if not n.startswith('_'):
                    x = getattr(self, n)(x)
                elif n.startswith('_dropout'):
                    x = f(x, 0.1)
                else:
                    x = f(x)

            return chainerrl.action_value.DiscreteActionValue(x)

    # 创建ddqn agent
    def create_ddqn_agent(env, args):
        obs_size = env.observation_space.shape[0]
        action_space = env.action_space
        n_actions = action_space.n

        # q_func = q_functions.FCStateQFunctionWithDiscreteAction(
        #     obs_size, n_actions,
        #     n_hidden_channels=args.n_hidden_channels,
        #     n_hidden_layers=args.n_hidden_layers)
        q_func = QFunction(obs_size, n_actions)
        if args.gpu:
            q_func.to_gpu(args.gpu)

        # Draw the computational graph and save it in the output directory.
        if not args.test and not args.gpu:
            chainerrl.misc.draw_computational_graph(
                [q_func(np.zeros_like(env.observation_space, dtype=np.float32)[None])],
                os.path.join(args.outdir, 'model'))

        # Use epsilon-greedy for exploration
        explorer = explorers.LinearDecayEpsilonGreedy(
            args.start_epsilon, args.end_epsilon, args.final_exploration_steps,
            action_space.sample)
        # explorer = explorers.Boltzmann()
        # explorer = explorers.ConstantEpsilonGreedy(
        #     epsilon=0.3, random_action_func=env.action_space.sample)

        opt = optimizers.Adam()
        opt.setup(q_func)

        rbuf_capacity = 5 * 10 ** 3
        if args.episodic_replay:
            if args.minibatch_size is None:
                args.minibatch_size = 4
            if args.prioritized_replay:
                betasteps = (args.steps - args.replay_start_size) // args.update_interval
                rbuf = replay_buffer.PrioritizedEpisodicReplayBuffer(rbuf_capacity, betasteps=betasteps)
            else:
                rbuf = replay_buffer.EpisodicReplayBuffer(rbuf_capacity)
        else:
            if args.minibatch_size is None:
                args.minibatch_size = 32
            if args.prioritized_replay:
                betasteps = (args.steps - args.replay_start_size) // args.update_interval
                rbuf = replay_buffer.PrioritizedReplayBuffer(rbuf_capacity, betasteps=betasteps)
            else:
                rbuf = replay_buffer.ReplayBuffer(rbuf_capacity)

        # Chainer only accepts numpy.float32 by default, make sure
        # a converter as a feature extractor function phi.
        phi = lambda x: x.astype(np.float32, copy=False)

        agent = chainerrl.agents.DoubleDQN(q_func, opt, rbuf, gamma=args.gamma,
                                           explorer=explorer, replay_start_size=args.replay_start_size,
                                           target_update_interval=args.target_update_interval,
                                           update_interval=args.update_interval,
                                           phi=phi, minibatch_size=args.minibatch_size,
                                           target_update_method=args.target_update_method,
                                           soft_update_tau=args.soft_update_tau,
                                           episodic_update=args.episodic_replay,
                                           episodic_update_len=16)

        return agent

    # 开始训练
    def train_agent(args, use_score=False):
        ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0'
        env = gym.make(ENV_NAME)
        ENV_TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0'
        test_env = gym.make(ENV_TEST_NAME)

        # np.random.seed(123)
        env.seed(123)
        # Set a random seed used in ChainerRL
        misc.set_random_seed(123)

        agent = create_ddqn_agent(env, args)

        q_hook = PlotHook('Average Q Value', ylabel='Average Action Value (Q)')
        loss_hook = PlotHook('Average Loss', plot_index=1, ylabel='Average Loss per Episode')
        reward_hook = PlotHook('Average Reward', plot_index=2, ylabel='Reward Value per Episode')
        scores_hook = TrainingScoresHook('scores.txt', args.outdir)

        chainerrl.experiments.train_agent_with_evaluation(
            agent, env,
            steps=args.steps,  # Train the graduation_agent for this many rounds steps
            max_episode_len=env.maxturns,  # Maximum length of each episodes
            eval_interval=args.eval_interval,  # Evaluate the graduation_agent after every 1000 steps
            eval_n_runs=args.eval_n_runs,  # 100 episodes are sampled for each evaluation
            outdir=args.outdir,  # Save everything to 'result' directory
            step_hooks=[q_hook, loss_hook, scores_hook, reward_hook],
            successful_score=7,
            eval_env=test_env
        )

        # 保证训练一轮就成功的情况下能成功打印scores.txt文件
        scores_hook(None, None, 1000)

        return env, agent

    # 获取保存的模型目录
    def get_latest_model_dir_from(basedir):
        dirs = os.listdir(basedir)
        lastmodel = -1
        for d in dirs:
            try:
                if int(d) > lastmodel:
                    lastmodel = int(d)
            except ValueError:
                continue

        assert lastmodel >= 0, "No saved models!"
        return os.path.join(basedir, str(lastmodel))

    # kerasrl
    # def generate_dense_model(input_shape, nb_actions):
    #     model = Sequential()
    #     model.add(Flatten(input_shape=input_shape))
    #     # normalize before compute
    #     model.add(BatchNormalization())
    #     model.add(Dropout(0.1))  # drop out the input to make model less sensitive to any 1 feature
    #
    #     for layer in net_layers:
    #         model.add(Dense(layer))
    #         model.add(ELU(alpha=1.0))
    #         model.add(Dropout(0.1))
    #
    #     model.add(Dense(nb_actions))
    #     model.add(Activation('linear'))
    #     print(model.summary())
    #
    #     return model
    #
    # def train_keras_dqn_model(args):
    #     ENV_NAME = 'malware-v0'
    #     env = gym.make(ENV_NAME)
    #     env.seed(123)
    #     nb_actions = env.action_space.n
    #     window_length = 1  # "experience" consists of where we were, where we are now
    #
    #     # generate a policy model
    #     model = generate_dense_model((window_length,) + env.observation_space.shape, nb_actions)
    #
    #     # configure and compile our graduation_agent
    #     # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values
    #     policy = BoltzmannQPolicy()
    #
    #     # memory can help a model during training
    #     # for this, we only consider a single malware sample (window_length=1) for each "experience"
    #     memory = SequentialMemory(limit=1000, ignore_episode_boundaries=False, window_length=window_length)
    #
    #     # DQN graduation_agent as described in Mnih (2013) and Mnih (2015).
    #     # http://arxiv.org/pdf/1312.5602.pdf
    #     # http://arxiv.org/abs/1509.06461
    #     agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16,
    #                      enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg',
    #                      target_model_update=1e-2, policy=policy, batch_size=16)
    #
    #     # keras-rl allows one to use and built-in keras optimizer
    #     agent.compile(RMSprop(lr=1e-2), metrics=['mae'])
    #
    #     # play the game. learn something!
    #     agent.fit(env, nb_steps=args.steps, visualize=False, verbose=2)
    #
    #     history_test = None
    #
    #     if args.test:
    #         # Set up the testing environment
    #         TEST_NAME = 'malware-test-v0'
    #         test_env = gym.make(TEST_NAME)
    #
    #         # evaluate the graduation_agent on a few episodes, drawing randomly from the test samples2
    #         agent.test(test_env, nb_episodes=100, visualize=False)
    #         history_test = test_env.history
    #
    #     return env, agent

    # test
    if not args.test:
        print("training...")

        # 反复多次重新训练模型,避免手工操作
        for _ in range(args.rounds):
            args.outdir = experiments.prepare_output_dir(
                args, args.outdir, argv=sys.argv)
            print('Output files are saved in {}'.format(args.outdir))

            env, agent = train_agent(args)
            # env, agent = train_keras_dqn_model(args)

            with open(os.path.join(args.outdir, 'scores.txt'), 'a') as f:
                f.write(
                    "total_turn/episode->{}({}/{})\n".format(env.total_turn / env.episode, env.total_turn, env.episode))
                f.write("history:\n")

                count = 0
                success_count = 0
                for k, v in env.history.items():
                    count += 1
                    if v['evaded']:
                        success_count += 1
                        f.write("{}:{}->{}\n".format(count, k, v['evaded_sha256']))
                    else:
                        f.write("{}:{}->\n".format(count, k))

                f.write("success count:{}".format(success_count))
                f.write("{}".format(env.history))

            # 标识成功失败
            dirs = os.listdir(args.outdir)
            second_line = linecache.getline(os.path.join(args.outdir, 'scores.txt'), 2)
            success_score = second_line.strip('\n').split('\t')[3]

            # 训练提前结束,标识成功
            success_flag = False
            for file in dirs:
                if file.endswith('_finish') and not file.startswith(str(args.steps)):
                    success_flag = True
                    break

            os.rename(args.outdir, '{}-{}{}'.format(args.outdir, success_score, '-success' if success_flag else ''))

            # 重置outdir到models
            args.outdir = 'models'
    else:
        print("testing...")
        model_fold = os.path.join(args.outdir, args.load)
        scores_file = os.path.join(model_fold, 'scores.txt')

        # baseline: choose actions at random
        if args.test_random:
            random_action = lambda bytez: np.random.choice(list(manipulate.ACTION_TABLE.keys()))
            random_success, misclassified = evaluate(random_action)
            total = len(sha256_holdout) - len(misclassified)  # don't count misclassified towards success

            with open(scores_file, 'a') as f:
                random_result = "random: {}({}/{})\n".format(len(random_success) / total, len(random_success), total)
                f.write(random_result)
                f.write("==========================\n")

        total = len(sha256_holdout)
        fe = pefeatures.PEFeatureExtractor()

        def agent_policy(agent):
            def f(bytez):
                # first, get features from bytez
                feats = fe.extract2(bytez)
                action_index = agent.act(feats)
                return ACTION_LOOKUP[action_index]

            return f

        # ddqn
        env = gym.make('malware-test-v0')
        agent = create_ddqn_agent(env, args)
        mm = get_latest_model_dir_from(model_fold)
        agent.load(mm)
        success, _ = evaluate(agent_policy(agent))
        blackbox_result = "black: {}({}/{})".format(len(success) / total, len(success), total)
        with open(scores_file, 'a') as f:
            f.write("{}->{}\n".format(mm, blackbox_result))
    assert lastmodel >= 0, "No saved models!"
    return os.path.join(basedir, str(lastmodel))


if __name__ == '__main__':
    # baseline: choose actions at random
    random_action = lambda bytez: np.random.choice(
        list(manipulate.ACTION_TABLE.keys()))
    random_success, misclassified = evaluate(random_action)
    total = len(sha256_holdout) - len(
        misclassified)  # don't count misclassified towards success

    ENV_NAME = 'malware-test-v0'
    env = gym.make(ENV_NAME)

    fe = pefeatures.PEFeatureExtractor()

    def agent_policy(agent):
        def f(bytez):
            # first, get features from bytez
            feats = fe.extract(bytez)
            action_index = agent.act(feats)
            return ACTION_LOOKUP[action_index]

        return f

    agent = create_acer_agent(env)
    # pull latest stored model
    last_model_dir = get_latest_model_from('models/acer_chainer')
    agent.load(last_model_dir)
    success, _ = evaluate(agent_policy(agent))
Пример #11
0
def compute_observation(bytez, feature_min_, V, PCA_component, feature_scale_):
    fe = pefeatures.PEFeatureExtractor()
    raw_features = fe.extract(bytez)
    # scaled_features = scale_min_imp(raw_features, feature_scale_, feature_min_)
    observation = np.dot(raw_features[np.newaxis, :], V.T[:, :PCA_component])
    return observation