Python Reinforcement示例，reinforcement.Reinforcement Python示例

示例#1

0

显示文件

文件： xcs_er.py 项目： LagLukas/XCS_TESTING

 def __init__(self, max_population_size, possible_actions=[], histlen=42):
     self.name = "XCS_ER"
     self.action_size = len(possible_actions)
     self.max_population_size = max_population_size
     self.possible_actions = possible_actions
     self.population = []
     self.time_stamp = 1
     self.action_history = []
     self.old_action_history = []
     self.reinforce = Reinforcement()
     self.ga = CIGeneticAlgorithm(possible_actions)
     #################################
     self.single_testcases = True
     self.histlen = histlen
     #################################
     self.rewards = None
     self.p_explore = 0.25
     self.train_mode = True
     #################################
     # dumb idea that will never work
     #################################
     self.experience_length = 12000
     self.experience_batch_size = 2000
     self.experience = XCSExperienceReplay(
         max_memory=self.experience_length)
     self.ci_cycle = 0

示例#2

0

显示文件

    def bundle_comparison(w_arr, L, shape, scale, E):
        '''bundle (Weibull fibers) response for comparison with the CB model'''
        from scipy.stats import weibull_min
        sV0 = scale * (3.14159 * 0.00345**2 * L)**(1 / shape)
        eps = w_arr / L * (1. - weibull_min(shape, scale=scale).cdf(w_arr / L))
        plt.plot(w_arr / L,
                 eps * E,
                 lw=4,
                 color='red',
                 ls='dashed',
                 label='FB model')

        bundle = Reinforcement(r=0.00345,
                               tau=0.00001,
                               V_f=0.9999,
                               E_f=E,
                               xi=WeibullFibers(shape=shape, sV0=sV0),
                               n_int=50)
        ccb = CompositeCrackBridge(E_m=25e3,
                                   reinforcement_lst=[bundle],
                                   Ll=L / 2.,
                                   Lr=L / 2.)
        ccb_view.model = ccb
        eps = []
        for w in w_arr:
            ccb.w = w
            eps.append(ccb_view.sigma_c / E)
        plt.plot(w_arr / L,
                 np.array(eps) * E,
                 color='blue',
                 lw=2,
                 label='CB model')
        plt.legend(loc='best')

示例#3

0

显示文件

    def analytical_comparison():
        '''for the case when tau is deterministic,
        there is an analytical solution.
        The differences are caused by the additional matrix
        stiffness due to broken fibers, which are in the CB model
        added to matrix stiffness. As the matrix E grows and the V_f
        decreases, the solutions tend to get closer'''
        tau, E_f, E_m, V_f = 0.1, 72e3, 25e3, 0.2
        r, shape, scale = 0.001, 5., 0.02
        # analytical solution for damage controlled test
        ctrl_damage = np.linspace(0.0, .99, 100)

        def crackbridge(w, tau, E_f, E_m, V_f, r, omega):
            Kf = E_f * V_f * (1 - omega)
            Km = E_m * (1 - V_f) + E_f * V_f * omega
            Kc = Kf + Km
            T = 2. * tau * V_f * (1. - omega) / r
            c = np.sqrt(Kc * T / Km / Kf)
            return c * np.sqrt(w) * (1 - omega)

        def w_omega(tau, E_f, E_m, V_f, r, omega, shape, scale):
            Kf = E_f * V_f * (1 - omega)
            Km = E_m * (1 - V_f) + E_f * V_f * omega
            Kc = Kf + Km
            T = 2. * tau * V_f * (1. - omega) / r
            return (-np.log(1. - omega)) ** (2. / shape) \
                    * scale ** 2 * Km * Kf / Kc / T

        w_lst = [
            w_omega(tau, E_f, E_m, V_f, r, omega, shape, scale)
            for omega in ctrl_damage
        ]
        epsf = crackbridge(np.array(w_lst), tau, E_f, E_m, V_f, r, ctrl_damage)
        plt.plot(np.array(w_lst),
                 epsf * E_f * V_f,
                 color='red',
                 lw=4,
                 ls='dashed',
                 label='analytical')

        reinf = Reinforcement(r=r,
                              tau=tau,
                              V_f=V_f,
                              E_f=E_f,
                              xi=RV('weibull_min', shape=shape, scale=scale),
                              n_int=20)

        ccb = CompositeCrackBridge(E_m=E_m,
                                   reinforcement_lst=[reinf],
                                   Ll=1000.,
                                   Lr=1000.)

        stress = []
        w_arr = np.linspace(0.0, np.max(w_lst), 100)
        for w in w_arr:
            ccb_view.model.w = w
            stress.append(ccb_view.sigma_c)
        plt.plot(w_arr, stress, color='blue', lw=2, label='CB model')
        plt.legend(loc='best')

示例#4

0

显示文件

文件： xcs.py 项目： LagLukas/XCS_TESTING

 def __init__(self, max_population_size, possible_actions=[], histlen=42):
     self.name = "XCS"
     self.action_size = len(possible_actions)
     self.max_population_size = max_population_size
     self.possible_actions = possible_actions
     self.population = []
     self.time_stamp = 1
     self.action_history = []
     self.old_action_history = []
     self.reinforce = Reinforcement()
     self.ga = CIGeneticAlgorithm(possible_actions)
     #################################
     self.single_testcases = True
     self.histlen = histlen
     #################################
     # stuff for batch update
     self.max_prediction_sum = 0
     self.rewards = None
     self.p_explore = 0.25
     self.train_mode = True

示例#5

0

显示文件

文件： xcs_er.py 项目： LagLukas/XCS_TESTING

class XCS_ER:

    GAMMA = 0.71

    def __init__(self, max_population_size, possible_actions=[], histlen=42):
        self.name = "XCS_ER"
        self.action_size = len(possible_actions)
        self.max_population_size = max_population_size
        self.possible_actions = possible_actions
        self.population = []
        self.time_stamp = 1
        self.action_history = []
        self.old_action_history = []
        self.reinforce = Reinforcement()
        self.ga = CIGeneticAlgorithm(possible_actions)
        #################################
        self.single_testcases = True
        self.histlen = histlen
        #################################
        self.rewards = None
        self.p_explore = 0.25
        self.train_mode = True
        #################################
        # dumb idea that will never work
        #################################
        self.experience_length = 12000
        self.experience_batch_size = 2000
        self.experience = XCSExperienceReplay(
            max_memory=self.experience_length)
        self.ci_cycle = 0

    def get_action(self, state):
        '''
        :param state: State in Retects. In the XCS world = situation.

        :return : a action
        '''
        theta_mna = len(self.possible_actions)
        matcher = CIMatching(theta_mna, self.possible_actions)
        match_set = matcher.get_match_set(self.population, state,
                                          self.time_stamp)
        self.p_explore = (self.p_explore - 0.1) * 0.99 + 0.1
        action_selector = ActionSelection(self.possible_actions,
                                          self.p_explore)
        prediction_array = action_selector.get_prediction_array(match_set)
        action = action_selector.select_action(prediction_array,
                                               self.train_mode)
        self.action_history.append((state, action))
        return action

    def reward(self, new_rewards):
        try:
            x = float(new_rewards)
            new_rewards = [x] * len(self.action_history)
        except Exception as _:
            if len(new_rewards) < len(self.action_history):
                raise Exception('Too few rewards')
        for i in range(0, len(new_rewards)):
            reward = new_rewards[i]
            state, action = self.action_history[i]
            self.experience.remember((state, action, reward, self.ci_cycle))
        self.action_history = []
        self.ci_cycle += 1
        if self.ci_cycle == 2 or self.ci_cycle % 3 == 0:
            print("start ER")
            self.learn_from_experience()
            print("finish ER")
        print("finished CI cyle " + str(self.ci_cycle - 1))

    def get_average_prediction(self, cycle_id, on_policy=False):
        next_experiences = self.experience.get_get_exp_of_CI_cyle(cycle_id + 1)
        if next_experiences is None:
            return None
        prediction_sum = 0
        for old_experience in next_experiences:
            state, _, _, _ = old_experience
            theta_mna = len(self.possible_actions)
            matcher = CIMatching(theta_mna, self.possible_actions)
            match_set = matcher.get_match_set(self.population, state,
                                              self.time_stamp)
            action_selector = ActionSelection(self.possible_actions, 0)
            prediction_array = action_selector.get_prediction_array(match_set)
            action = action_selector.select_action(prediction_array,
                                                   self.train_mode)
            if on_policy:
                prediction_sum += prediction_array[action]
            else:
                prediction_sum += max(prediction_array.keys(),
                                      key=(lambda k: prediction_array[k]))
        return prediction_sum / len(next_experiences)

    def learn_from_experience(self):
        experiences = self.experience.get_batch(self.experience_batch_size,
                                                self.ci_cycle - 1)
        states, actions, rewards, ci_cyles = zip(*experiences)
        cycles_of_batch = set(ci_cyles)
        prediction_vals = {}
        for cycle_id in cycles_of_batch:
            prediction_vals[cycle_id] = self.get_average_prediction(
                cycle_id, False)
        print("retrieved prediction approx.")
        for i in range(0, len(rewards)):
            state = states[i]
            action = actions[i]
            reward = rewards[i]
            cycle = ci_cyles[i]
            if prediction_vals[cycle] is not None:
                discounted_reward = reward + XCS_ER.GAMMA * prediction_vals[
                    cycle]
                # match set
                theta_mna = len(self.possible_actions)
                # use covering?
                # len(self.possible_actions)
                matcher = CIMatching(theta_mna, self.possible_actions)
                match_set = matcher.get_match_set(self.population, state,
                                                  self.time_stamp)
                # action_set
                action_selector = ActionSelection(self.possible_actions,
                                                  self.p_explore)
                action_set = action_selector.get_action_set(match_set, action)
                if len(action_set) > 0:
                    # update classifiers
                    self.reinforce.reinforce(action_set, discounted_reward)
                    self.ga.perform_iteration(action_set, state,
                                              self.population, self.time_stamp)
                    self.time_stamp += 1
            if i % 10 == 0:
                print("finished " + str(i / len(rewards)) + " percent of ER")
        self.delete_from_population()

    def delete_from_population(self):
        '''
        Deletes as many classifiers as necessary until the population size is within the
        defined bounds.
        '''
        total_numerosity = sum(
            list(map(lambda x: x.numerosity, self.population)))
        while len(self.population) > self.max_population_size:
            total_fitness = sum(list(map(lambda x: x.fitness,
                                         self.population)))
            avg_fitness = total_fitness / total_numerosity
            vote_sum = sum(
                list(
                    map(lambda x: x.deletion_vote(avg_fitness),
                        self.population)))
            choice_point = random.random() * vote_sum
            vote_sum = 0
            for classifier in self.population:
                vote_sum += classifier.deletion_vote(avg_fitness)
                if vote_sum > choice_point:
                    if classifier.numerosity > 1:
                        classifier.numerosity = classifier.numerosity - 1
                    else:
                        self.population.remove(classifier)

    def save(self, filename):
        """ Stores agent as pickled file """
        pickle.dump(self, open(filename + '.p', 'wb'), 2)

    @classmethod
    def load(cls, filename):
        return pickle.load(open(filename + '.p', 'rb'))

示例#6

0

显示文件

'''

from depend_CB_model import CompositeCrackBridge
from depend_CB_postprocessor import CompositeCrackBridgeView
from reinforcement import Reinforcement, WeibullFibers
from spirrid.rv import RV
from matplotlib import pyplot as plt
import numpy as np

if __name__ == '__main__':

    # AR-glass
    reinf1 = Reinforcement(
        r=0.001,  #RV('uniform', loc=0.012, scale=0.002),
        tau=0.1,  #RV('uniform', loc=.3, scale=.1),
        V_f=0.2,
        E_f=72e3,
        xi=RV('weibull_min', shape=5., scale=.02),
        n_int=50)

    # carbon
    reinf2 = Reinforcement(r=RV('uniform', loc=0.002, scale=0.002),
                           tau=RV('uniform', loc=.6, scale=.1),
                           V_f=0.05,
                           E_f=200e3,
                           xi=RV('weibull_min', shape=10., scale=.015),
                           n_int=15)

    # instance of CompCrackBridge with matrix E and BC
    model = CompositeCrackBridge(E_m=25e3,
                                 reinforcement_lst=[reinf1],

示例#7

0

显示文件

文件： settings.py 项目： nipinghe/core

def setValues():
    """Default settings

  :Returns:
    - Default settings for the probabilistic models for degradation of concrete.
  """

    # Concrete settings
    concrete = Concrete('C25/30')

    concrete.setWCratio(0.4)
    # values: (0.3),0.4,(0.45),0.5

    concrete.setCuringPeriod(1)
    # values: 1,3,7,28

    concrete.setGrade(45)
    # values: 45,40,25,35

    # Reinforcement settings
    reinforcement = Reinforcement('S500')
    reinforcement.setYieldStress(500)
    # values: all

    reinforcement.setDiameter(16)
    # values: (8),10,16,27

    reinforcement.setBars(1)
    # values: all

    # Geometry settings
    geometrie = Geometrie('Beam')
    geometrie.setCover(30)  # values: all
    geometrie.setBeamWidth(350)
    geometrie.setBeamHeight(550)
    geometrie.setBeamLength(5000)

    # Environment settings
    environment = Environment()

    environment.setZone('Submerged')
    # values: 'Submerged','Tidal','Splash','Atmospheric'

    environment.setHumidity(80)
    # values: 50,65,80,95,100

    # for the simplified corrosion rate:
    # environment.setExposure('Wet-Dry')
    # values: 'Wet','Wet-Dry','Airborne sea water','Tidal'

    environment.setTemperature(20)
    # values: all

    environment.setShelter('Unsheltered')
    # 'Sheltered','Unsheltered'

    # Chloride
    chloride = Chloride(concrete, geometrie, environment)

    # Carbonation
    carbonation = Carbonation(concrete, geometrie, environment)

    # Propagation
    rate = Propagation(environment)

    # Corrosion
    pitting = Pitting(reinforcement, rate)
    # pitting.setDeltaTime(50)
    # values: all

    # Resistance
    resistance = Resistance(concrete, reinforcement, geometrie, rate, pitting)

    return concrete, reinforcement, geometrie, environment, chloride, carbonation, rate, pitting, resistance

示例#8

0

显示文件

def get_reward_max(smiles,
                   predictor,
                   threshold,
                   invalid_reward=1.0,
                   get_features=get_fp):
    mol, prop, nan_smiles = predictor.predict([smiles],
                                              get_features=get_features)
    if len(nan_smiles) == 1:
        return invalid_reward
    if prop[0] >= threshold:
        return 10.0
    else:
        return invalid_reward


RL_max = Reinforcement(my_generator_max, my_predictor, get_reward_max)

n_iterations = 60000
data_path = [
    '../data/egfr_actives.smi', '../data/egfr_enamine.smi',
    '../data/egfr_mixed.smi'
]
save_path = [
    '../checkpoints/generator/egfr_clf_rnn_primed',
    '../checkpoints/generator/egfr_clf_rnn_enamine_primed',
    '../checkpoints/generator/egfr_clf_rnn_mixed_primed'
]
for dpath, mpath in zip(data_path, save_path):
    print('Fine-tuning on %s...' % dpath)
    np.random.seed(42)
    torch.manual_seed(42)

示例#9

0

显示文件

文件： egfr_demo_timelapse.py 项目： isayevlab/rl_experiments

def main(n_iterations=20,
         n_policy=10,
         n_policy_replay=15,
         batch_size=16,
         n_fine_tune=None,
         seed=None,
         replay_data_path='../data/gen_actives.smi',
         primed_path='../checkpoints/generator/checkpoint_batch_training',
         save_every=2,
         save_path=None):
    save_path = os.path.splitext(save_path)[0]
    save_path = save_path.split('-')[0]
    if n_fine_tune is None:
        n_fine_tune = n_iterations

    # initialize RNG seeds for reproducibility
    if seed is not None:
        np.random.seed(seed)
        torch.manual_seed(seed)

    gen_data_path = '../data/chembl_22_clean_1576904_sorted_std_final.smi'
    tokens = [
        ' ', '<', '>', '#', '%', ')', '(', '+', '-', '/', '.', '1', '0', '3',
        '2', '5', '4', '7', '6', '9', '8', '=', 'a', '@', 'C', 'B', 'F', 'I',
        'H', 'O', 'N', 'P', 'S', '[', ']', '\\', 'c', 'e', 'i', 'l', 'o', 'n',
        'p', 's', 'r'
    ]
    global gen_data
    gen_data = GeneratorData(gen_data_path,
                             delimiter='\t',
                             cols_to_read=[0],
                             keep_header=True,
                             tokens=tokens)

    # Setting up the generative model
    hidden_size = 1500
    stack_width = 1500
    stack_depth = 200
    layer_type = 'GRU'
    optimizer = torch.optim.SGD
    lr = 0.0002
    generator = StackAugmentedRNN(input_size=gen_data.n_characters,
                                  hidden_size=hidden_size,
                                  output_size=gen_data.n_characters,
                                  layer_type=layer_type,
                                  n_layers=1,
                                  is_bidirectional=False,
                                  has_stack=True,
                                  stack_width=stack_width,
                                  stack_depth=stack_depth,
                                  use_cuda=use_cuda,
                                  optimizer_instance=optimizer,
                                  lr=lr)
    # Use a model pre-trained on active molecules
    generator.load_model(primed_path)

    # Setting up the predictor
    model_instance = RFC
    model_params = {'n_estimators': 250, 'n_jobs': 10}
    predictor = VanillaQSAR(model_instance=model_instance,
                            model_params=model_params,
                            model_type='classifier')
    predictor.load_model('../checkpoints/predictor/egfr_rfc')

    # Setting up the reinforcement model
    def get_reward(smiles,
                   predictor,
                   threshold,
                   invalid_reward=1.0,
                   get_features=get_fp):
        mol, prop, nan_smiles = predictor.predict([smiles],
                                                  get_features=get_features)
        if len(nan_smiles) == 1:
            return invalid_reward
        if prop[0] >= threshold:
            return 10.0
        else:
            return invalid_reward

    RL_model = Reinforcement(generator, predictor, get_reward)

    # Define replay update functions
    def update_threshold(cur_threshold,
                         prediction,
                         proportion=0.15,
                         step=0.05):
        if (prediction >= cur_threshold).mean() >= proportion:
            new_threshold = min(cur_threshold + step, 1.0)
            return new_threshold
        else:
            return cur_threshold

    def update_data(smiles, prediction, replay_data, fine_tune_data,
                    threshold):
        for i in range(len(prediction)):
            if prediction[i] >= max(threshold, 0.2):
                fine_tune_data.file.append('<' + smiles[i] + '>')
            if prediction[i] >= threshold:
                replay_data.append(smiles[i])
        return fine_tune_data, replay_data

    fine_tune_data = GeneratorData(replay_data_path,
                                   tokens=tokens,
                                   cols_to_read=[0],
                                   keep_header=True)
    replay_data = GeneratorData(replay_data_path,
                                tokens=tokens,
                                cols_to_read=[0],
                                keep_header=True)
    replay_data = [traj[1:-1] for traj in replay_data.file]

    rl_losses = []
    rewards = []
    n_to_generate = 200
    threshold = 0.05
    start = time.time()
    active_threshold = 0.75

    tmp = sys.stdout
    sys.stdout = sys.__stdout__
    smiles, predictions, gen_metrics = estimate_and_update(
        RL_model.generator,
        RL_model.predictor,
        1000,
        batch_size=batch_size,
        plot=False,
        threshold=active_threshold,
        return_metrics=True)
    sys.stdout = tmp
    mol_data = pd.DataFrame(dict(smiles=smiles, predictions=predictions))
    if save_path:
        save_path_ = save_path + '-0.smi'
        mol_data.to_csv(save_path_, index=False, header=False)

    #  log_path = save_path + '.log'
    #  with open(log_path, 'wt') as f:
    #      print('starting log', file=f)

    for i in range(n_iterations):
        print('%3.d Training on %d replay instances...' %
              (i + 1, len(replay_data)))
        print('Setting threshold to %f' % threshold)

        print('Policy gradient...')
        for j in trange(n_policy, desc=' %3.d Policy gradient...' % (i + 1)):
            cur_reward, cur_loss = RL_model.policy_gradient(
                gen_data, get_features=get_fp, threshold=threshold)

            rewards.append(simple_moving_average(rewards, cur_reward))
            rl_losses.append(simple_moving_average(rl_losses, cur_loss))
        print('Loss: %f' % rl_losses[-1])
        print('Reward: %f' % rewards[-1])
        smiles_cur, prediction_cur = estimate_and_update(
            RL_model.generator,
            RL_model.predictor,
            n_to_generate,
            batch_size=batch_size,
            get_features=get_fp,
            threshold=active_threshold,
            plot_counts=True,
            plot=False)
        fine_tune_data, replay_data = update_data(smiles_cur, prediction_cur,
                                                  replay_data, fine_tune_data,
                                                  threshold)
        threshold = update_threshold(threshold, prediction_cur)
        print('Sample trajectories:')
        for sm in smiles_cur[:5]:
            print(sm)

        print('Policy gradient replay...')
        for j in trange(n_policy_replay,
                        desc='%3.d Policy gradient replay...' % (i + 1)):
            cur_reward, cur_loss = RL_model.policy_gradient(
                gen_data,
                get_features=get_fp,
                replay=True,
                replay_data=replay_data,
                threshold=threshold)
        smiles_cur, prediction_cur = estimate_and_update(
            RL_model.generator,
            RL_model.predictor,
            n_to_generate,
            batch_size=batch_size,
            get_features=get_fp,
            threshold=active_threshold,
            plot=False)
        fine_tune_data, replay_data = update_data(smiles_cur, prediction_cur,
                                                  replay_data, fine_tune_data,
                                                  threshold)
        threshold = update_threshold(threshold, prediction_cur)
        print('Sample trajectories:')
        for sm in smiles_cur[:5]:
            print(sm)

        print('Fine tuning...')
        RL_model.fine_tune(data=fine_tune_data,
                           n_steps=n_fine_tune,
                           batch_size=batch_size,
                           print_every=10000)
        smiles_cur, prediction_cur = estimate_and_update(
            RL_model.generator,
            RL_model.predictor,
            n_to_generate,
            batch_size=batch_size,
            get_features=get_fp,
            threshold=active_threshold,
            plot=False)
        fine_tune_data, replay_data = update_data(smiles_cur, prediction_cur,
                                                  replay_data, fine_tune_data,
                                                  threshold)
        threshold = update_threshold(threshold, prediction_cur)
        print('Sample trajectories:')
        for sm in smiles_cur[:5]:
            print(sm)
        print('')

        if (i + 1) % save_every == 0:
            # redirect output to keep valid log
            tmp = sys.stdout
            sys.stdout = sys.__stdout__
            smiles, predictions, gen_metrics = estimate_and_update(
                RL_model.generator,
                RL_model.predictor,
                1000,
                batch_size=batch_size,
                plot=False,
                threshold=active_threshold,
                return_metrics=True)
            mol_data = pd.DataFrame(
                dict(smiles=smiles, predictions=predictions))
            if save_path:
                save_path_ = save_path + '-%d.smi' % (i + 1)
                mol_data.to_csv(save_path_, index=False, header=False)
            sys.stdout = tmp

    duration = time.time() - start
    train_metrics = {}
    train_metrics['duration'] = duration
    mol_actives = mol_data[mol_data.predictions > active_threshold]
    egfr_data = pd.read_csv('../data/egfr_with_pubchem.csv')
    egfr_actives = egfr_data[egfr_data.predictions > active_threshold]
    mol_actives['molecules'] = mol_actives.smiles.apply(Chem.MolFromSmiles)
    egfr_actives['molecules'] = egfr_actives.smiles.apply(Chem.MolFromSmiles)
    lib_metrics = compare_libraries(mol_actives,
                                    egfr_actives,
                                    properties=['MolWt', 'MolLogP'],
                                    return_metrics=True,
                                    plot=False)
    # collate results of training
    results = {}
    results.update(train_metrics)
    results.update(gen_metrics)
    results.update(lib_metrics)

    params = dict(n_iterations=n_iterations,
                  n_policy=n_policy,
                  n_policy_replay=n_policy_replay,
                  n_fine_tune=n_fine_tune,
                  seed=seed,
                  replay_data_path=replay_data_path,
                  primed_path=primed_path)
    if save_path is not None:
        results['save_path'] = save_path_
    print('Metrics for %s:' % params)
    print(results)

示例#10

0

显示文件

                print 'broyden2 does not converge fast enough: switched to fsolve for this step'
                damage = fsolve(self.damage_residuum,
                                0.2 * np.ones_like(self.sorted_depsf))
            print 'damage =', np.sum(damage) / len(
                damage), 'iteration time =', time.clock() - ff, 'sec'
        return damage


if __name__ == '__main__':
    from mathkit.mfn.mfn_line.mfn_line import MFnLineArray
    from matplotlib import pyplot as plt

    reinf1 = Reinforcement(
        r=0.00345,  #RV('uniform', loc=0.001, scale=0.005),
        tau=RV('uniform', loc=4., scale=2.),
        V_f=0.2,
        E_f=70e3,
        xi=RV('weibull_min', shape=5., scale=0.04),
        n_int=100,
        label='AR glass')

    reinf2 = Reinforcement(
        r=0.003,  #RV('uniform', loc=0.002, scale=0.002),
        tau=RV('uniform', loc=.3, scale=.05),
        V_f=0.1,
        E_f=200e3,
        xi=WeibullFibers(shape=5., scale=0.02),
        n_int=100,
        label='carbon')

    ccb = CompositeCrackBridgeLoop(E_m=25e3,
                                   reinforcement_lst=[reinf1, reinf2],

示例#11

0

显示文件

文件： train.py 项目： PedroBernini/genome-rearrangements

    operation_selection_kinds = ["BREAKPOINTS", "FREE"]
    operation_selection_kind = operation_selection_kinds[int(sys.argv[2])]

    genome_problems = [
        rearrangements.Unsigned_Reversal, rearrangements.Transposition,
        rearrangements.Unsigned_RevTrans,
        rearrangements.Prefix_Unsigned_Reversal,
        rearrangements.Prefix_Transposition,
        rearrangements.Prefix_Unsigned_RevTrans
    ]
    genome_problem = genome_problems[int(sys.argv[3])]()

    easy_epoch = int(sys.argv[4])
    normal_epoch = int(sys.argv[5])

    reinforcement = Reinforcement()  ## Very importante Object

    m0 = int(sys.argv[6])
    m1 = int(sys.argv[7])

    player0 = models.select_player(m0, operation_selection_kind,
                                   permutation_size)
    player1 = models.select_player(m1, operation_selection_kind,
                                   permutation_size)

    if len(sys.argv) == 10:
        player0.model.load_weights(sys.argv[8])
        player1.model.load_weights(sys.argv[9])

    epoch = 0
    while epoch < easy_epoch:

示例#12

0

显示文件

文件： xcs.py 项目： LagLukas/XCS_TESTING

class XCS:

    GAMMA = 0.71

    def __init__(self, max_population_size, possible_actions=[], histlen=42):
        self.name = "XCS"
        self.action_size = len(possible_actions)
        self.max_population_size = max_population_size
        self.possible_actions = possible_actions
        self.population = []
        self.time_stamp = 1
        self.action_history = []
        self.old_action_history = []
        self.reinforce = Reinforcement()
        self.ga = CIGeneticAlgorithm(possible_actions)
        #################################
        self.single_testcases = True
        self.histlen = histlen
        #################################
        # stuff for batch update
        self.max_prediction_sum = 0
        self.rewards = None
        self.p_explore = 0.25
        self.train_mode = True

    def get_action(self, state):
        '''
        :param state: State in Retects. In the XCS world = situation.

        :return : a action
        '''
        theta_mna = len(self.possible_actions)
        matcher = CIMatching(theta_mna, self.possible_actions)
        match_set = matcher.get_match_set(self.population, state, self.time_stamp)
        self.p_explore = (self.p_explore - 0.1) * 0.99 + 0.1
        action_selector = ActionSelection(self.possible_actions, self.p_explore)
        prediction_array = action_selector.get_prediction_array(match_set)
        action = action_selector.select_action(prediction_array, self.train_mode)
        max_val = prediction_array[action] # on policy
        #max(prediction_array.keys(), key=(lambda k: prediction_array[k]))
        action_set = action_selector.get_action_set(match_set, action)
        self.max_prediction_sum += max_val
        self.action_history.append((state, action_set))
        return action

    def reward(self, new_rewards):
        try:
            x = float(new_rewards)
            new_rewards = [x] * len(self.action_history)
        except Exception as _:
            if len(new_rewards) < len(self.action_history):
                raise Exception('Too few rewards')
        old_rewards = self.rewards
        self.rewards = new_rewards
        if old_rewards is not None:
            avg_max_pred = self.max_prediction_sum / len(self.action_history)
            for i in range(0, len(old_rewards)):
                discounted_reward = old_rewards[i] + XCS.GAMMA * avg_max_pred
                old_sigma, old_action_set = self.old_action_history[i]
                self.reinforce.reinforce(old_action_set, discounted_reward)
                self.ga.perform_iteration(old_action_set, old_sigma, self.population, self.time_stamp)
                self.time_stamp += 1
        self.max_prediction_sum = 0
        self.old_action_history = self.action_history
        self.action_history = []
        self.delete_from_population()

    def delete_from_population(self):
        '''
        Deletes as many classifiers as necessary until the population size is within the
        defined bounds.
        '''
        total_numerosity = sum(list(map(lambda x: x.numerosity, self.population)))
        while len(self.population) > self.max_population_size:
            total_fitness = sum(list(map(lambda x: x.fitness, self.population)))
            avg_fitness = total_fitness / total_numerosity
            vote_sum = sum(list(map(lambda x: x.deletion_vote(avg_fitness), self.population)))
            choice_point = random.random() * vote_sum
            vote_sum = 0
            for classifier in self.population:
                vote_sum += classifier.deletion_vote(avg_fitness)
                if vote_sum > choice_point:
                    if classifier.numerosity > 1:
                        classifier.numerosity = classifier.numerosity - 1
                    else:
                        self.population.remove(classifier)

    def save(self, filename):
        """ Stores agent as pickled file """
        pickle.dump(self, open(filename + '.p', 'wb'), 2)

    @classmethod
    def load(cls, filename):
        return pickle.load(open(filename + '.p', 'rb'))

示例#13

0

显示文件

文件： main.py 项目： zwtian666/ReLeaSE

                                 n_layers=1,
                                 optimizer='Adadelta',
                                 lr=lr)

if use_cuda:
    my_generator = my_generator.cuda()

#my_generator.load_model('/home/mariewelt/Notebooks/PyTorch/Model_checkpoints/generator/policy_gradient_egfr_max')
my_generator.load_model(
    '/home/mariewelt/Notebooks/PyTorch/Model_checkpoints/generator/checkpoint_lstm'
)

egfr_predictor = RandomForestQSAR(n_estimators=100, n_ensemble=5)
egfr_predictor.load_model('/home/mariewelt/Notebooks/PyTorch/data/RF/EGFR_RF')

RL = Reinforcement(my_generator, egfr_predictor)
replay = ReplayMemory(capacity=10000)

for i in range(len(egfr_data.smiles)):
    if egfr_data.binary_labels[i] == 1.0:
        replay.push(egfr_data.smiles[i])

generated = []
for _ in range(replay.capacity):
    generated.append(my_generator.evaluate(gen_data))

sanitized = sanitize_smiles(generated)

for sm in sanitized:
    if sm is not None:
        replay.push(sm)

示例#14

0

显示文件

文件： settings.py 项目： hackl/core

def setValues():
  """Default settings

  :Returns:
    - Default settings for the probabilistic models for degradation of concrete.
  """

  # Concrete settings
  concrete = Concrete('C25/30')

  concrete.setWCratio(0.4)
  # values: (0.3),0.4,(0.45),0.5

  concrete.setCuringPeriod(1)
  # values: 1,3,7,28

  concrete.setGrade(45)
  # values: 45,40,25,35

  # Reinforcement settings
  reinforcement = Reinforcement('S500')
  reinforcement.setYieldStress(500)
  # values: all

  reinforcement.setDiameter(16)
  # values: (8),10,16,27

  reinforcement.setBars(1)
  # values: all

  # Geometry settings
  geometrie = Geometrie('Beam')
  geometrie.setCover(30) # values: all
  geometrie.setBeamWidth(350)
  geometrie.setBeamHeight(550)
  geometrie.setBeamLength(5000)

  # Environment settings
  environment = Environment()

  environment.setZone('Submerged')
  # values: 'Submerged','Tidal','Splash','Atmospheric'

  environment.setHumidity(80)
  # values: 50,65,80,95,100

  # for the simplified corrosion rate:
  # environment.setExposure('Wet-Dry')
  # values: 'Wet','Wet-Dry','Airborne sea water','Tidal'

  environment.setTemperature(20)
  # values: all

  environment.setShelter('Unsheltered')
  # 'Sheltered','Unsheltered'

  # Chloride
  chloride = Chloride(concrete,geometrie,environment)

  # Carbonation
  carbonation = Carbonation(concrete,geometrie,environment)

  # Propagation
  rate = Propagation(environment)

  # Corrosion
  pitting = Pitting(reinforcement,rate)
  # pitting.setDeltaTime(50)
  # values: all

  # Resistance
  resistance = Resistance(concrete,reinforcement,geometrie,rate,pitting)

  return concrete, reinforcement, geometrie, environment, chloride, carbonation, rate, pitting, resistance

示例#15

0

显示文件

文件： logp_optimization.py 项目： araman18/Summer-2020-Research

        return 1.0

print("done with reinforcement setup")

# plots the RL reward func.
x = np.linspace(-5, 12)
reward = lambda x: 11.0 if ((x > 1.0) and (x < 4.0)) else 1.0
plt.plot(x, [reward(i) for i in x])
plt.xlabel('logP value')
plt.ylabel('Reward value')
plt.title('Reward function for logP optimization')
plt.show()

# does the actual reinforcement
#Creates a Reinforcement object, uses previous generator and predictor except each smile is now put through the reward function
RL_logp = Reinforcement(my_generator_max, my_predictor, get_reward_logp)

#Only generator is affected by his since we use the same predictor

rewards = []
rl_losses = []

print(n_iterations)
for i in range(n_iterations):
    for j in trange(n_policy, desc='Policy gradient...'):
        cur_reward, cur_loss = RL_logp.policy_gradient(gen_data)
        rewards.append(simple_moving_average(rewards, cur_reward)) 
        rl_losses.append(simple_moving_average(rl_losses, cur_loss))
    
    plt.plot(rewards)
    plt.xlabel('Training iteration')