Exemplo n.º 1
0
params = read_inp()
for i in params:
    print(i, params[i])

DEFAULT_ENV_NAME = params['DEFAULT_ENV_NAME']  #"1k43"
MEAN_REWARD_BOUND = eval(params['MEAN_REWARD_BOUND'])  #-3.0
RENDER = eval(params['RENDER'])  #0

FCOUNTS = eval(params['FCOUNTS'])  #10
BCOUNT = eval(params['BCOUNT'])  #-1
TRACK = eval(
    params['TRACK']
)  #5 # how much residue coordinates be included from generated sequence

env = environ_grid('1k43.pdb', DEFAULT_ENV_NAME, RENDER, 0, TRACK, FCOUNTS,
                   BCOUNT)

GAMMA = eval(params['GAMMA'])  #0.99
BATCH_SIZE = eval(params['BATCH_SIZE'])  #32
REPLAY_SIZE = eval(params['REPLAY_SIZE'])  #10000
LEARNING_RATE = eval(params['LEARNING_RATE'])  #1e-4
SYNC_TARGET_FRAMES = eval(params['SYNC_TARGET_FRAMES'])  #1000
REPLAY_START_SIZE = eval(params['REPLAY_START_SIZE'])  #10000

EPSILON_DECAY_LAST_FRAME = eval(params['EPSILON_DECAY_LAST_FRAME'])  #10**6
EPSILON_START = eval(params['EPSILON_START'])  #1.0
EPSILON_FINAL = eval(params['EPSILON_FINAL'])  #0.05

MAX_ITER = eval(params['MAX_ITER'])  #10**9

Experience = collections.namedtuple(
Exemplo n.º 2
0
Arquivo: dqn.py Projeto: Niraj288/PfRL
    #  we don't want to back-propagate through this calculation
    #  because it is just observations that we want to be true
    #  That is, we want to change the expected values output from 
    #  the net, not the observations calculation
    next_state_values = next_state_values.detach()

    # calc the Q function behavior we want
    expected_state_action_values = next_state_values * GAMMA + rewards_v
    
    # compare what we have to what we want
    return nn.MSELoss()(state_action_values, expected_state_action_values)


DEFAULT_ENV_NAME = "Protein folding"

env = environ_grid('1k43.pdb',DEFAULT_ENV_NAME, 0)

print (env)

obs_size = env.obs_size
n_actions = env.n_actions

print(obs_size,n_actions)

class Net(nn.Module):
    def __init__(self, obs_size, hidden_size, n_actions):
        super(Net, self).__init__()
        def init_weights(m):
            if type(m) == nn.Linear:
                    torch.nn.init.xavier_uniform(m.weight)
                    m.bias.data.fill_(0.0)
Exemplo n.º 3
0
DEFAULT_ENV_NAME = '1k43'
pdb = '1k43.pdb'
if len(sys.argv) > 2:
    pdb = sys.argv[2]

RENDER = 1
test = 1

DEFAULT_ENV_NAME = params['DEFAULT_ENV_NAME']
FCOUNTS = eval(params['FCOUNTS'])  #10
BCOUNT = eval(params['BCOUNT'])  #-1
TRACK = eval(params['TRACK'])  #5

HIDDEN_SIZE = eval(params['HIDDEN_SIZE'])

env = environ_grid(pdb, DEFAULT_ENV_NAME, RENDER, test, TRACK, FCOUNTS, BCOUNT)

state = env.reset()
total_reward = 0.0
c = collections.Counter()
RENDER = 1
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D

test_net = DQN(env.obs_size, HIDDEN_SIZE, env.n_actions)
print(test_net)
test_net.load_state_dict(
    torch.load("models/" + DEFAULT_ENV_NAME + "-best.dat",
               map_location=lambda storage, loc: storage))

while True:
Exemplo n.º 4
0
RENDER = 1

test = 1

if RENDER:
    os.system('rm -rf temp_grid.npy')

DEFAULT_ENV_NAME = "Protein folding"
device = "cpu"

if len(sys.argv) > 1:
    pdb = sys.argv[1]
else:
    pdb = '1k43.pdb'

env = environ_grid(pdb, DEFAULT_ENV_NAME, RENDER, test)

print(env)


class Net(nn.Module):
    def __init__(self, obs_size, hidden_size, n_actions):
        super(Net, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(obs_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, int(hidden_size / 2)),
            #nn.ReLU(),
            #nn.Linear(int(hidden_size/2), int(hidden_size/4)),
            nn.ReLU(),
            nn.Linear(int(hidden_size / 2), n_actions))