params = read_inp() for i in params: print(i, params[i]) DEFAULT_ENV_NAME = params['DEFAULT_ENV_NAME'] #"1k43" MEAN_REWARD_BOUND = eval(params['MEAN_REWARD_BOUND']) #-3.0 RENDER = eval(params['RENDER']) #0 FCOUNTS = eval(params['FCOUNTS']) #10 BCOUNT = eval(params['BCOUNT']) #-1 TRACK = eval( params['TRACK'] ) #5 # how much residue coordinates be included from generated sequence env = environ_grid('1k43.pdb', DEFAULT_ENV_NAME, RENDER, 0, TRACK, FCOUNTS, BCOUNT) GAMMA = eval(params['GAMMA']) #0.99 BATCH_SIZE = eval(params['BATCH_SIZE']) #32 REPLAY_SIZE = eval(params['REPLAY_SIZE']) #10000 LEARNING_RATE = eval(params['LEARNING_RATE']) #1e-4 SYNC_TARGET_FRAMES = eval(params['SYNC_TARGET_FRAMES']) #1000 REPLAY_START_SIZE = eval(params['REPLAY_START_SIZE']) #10000 EPSILON_DECAY_LAST_FRAME = eval(params['EPSILON_DECAY_LAST_FRAME']) #10**6 EPSILON_START = eval(params['EPSILON_START']) #1.0 EPSILON_FINAL = eval(params['EPSILON_FINAL']) #0.05 MAX_ITER = eval(params['MAX_ITER']) #10**9 Experience = collections.namedtuple(
# we don't want to back-propagate through this calculation # because it is just observations that we want to be true # That is, we want to change the expected values output from # the net, not the observations calculation next_state_values = next_state_values.detach() # calc the Q function behavior we want expected_state_action_values = next_state_values * GAMMA + rewards_v # compare what we have to what we want return nn.MSELoss()(state_action_values, expected_state_action_values) DEFAULT_ENV_NAME = "Protein folding" env = environ_grid('1k43.pdb',DEFAULT_ENV_NAME, 0) print (env) obs_size = env.obs_size n_actions = env.n_actions print(obs_size,n_actions) class Net(nn.Module): def __init__(self, obs_size, hidden_size, n_actions): super(Net, self).__init__() def init_weights(m): if type(m) == nn.Linear: torch.nn.init.xavier_uniform(m.weight) m.bias.data.fill_(0.0)
DEFAULT_ENV_NAME = '1k43' pdb = '1k43.pdb' if len(sys.argv) > 2: pdb = sys.argv[2] RENDER = 1 test = 1 DEFAULT_ENV_NAME = params['DEFAULT_ENV_NAME'] FCOUNTS = eval(params['FCOUNTS']) #10 BCOUNT = eval(params['BCOUNT']) #-1 TRACK = eval(params['TRACK']) #5 HIDDEN_SIZE = eval(params['HIDDEN_SIZE']) env = environ_grid(pdb, DEFAULT_ENV_NAME, RENDER, test, TRACK, FCOUNTS, BCOUNT) state = env.reset() total_reward = 0.0 c = collections.Counter() RENDER = 1 #import matplotlib.pyplot as plt #from mpl_toolkits.mplot3d import Axes3D test_net = DQN(env.obs_size, HIDDEN_SIZE, env.n_actions) print(test_net) test_net.load_state_dict( torch.load("models/" + DEFAULT_ENV_NAME + "-best.dat", map_location=lambda storage, loc: storage)) while True:
RENDER = 1 test = 1 if RENDER: os.system('rm -rf temp_grid.npy') DEFAULT_ENV_NAME = "Protein folding" device = "cpu" if len(sys.argv) > 1: pdb = sys.argv[1] else: pdb = '1k43.pdb' env = environ_grid(pdb, DEFAULT_ENV_NAME, RENDER, test) print(env) class Net(nn.Module): def __init__(self, obs_size, hidden_size, n_actions): super(Net, self).__init__() self.net = nn.Sequential( nn.Linear(obs_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, int(hidden_size / 2)), #nn.ReLU(), #nn.Linear(int(hidden_size/2), int(hidden_size/4)), nn.ReLU(), nn.Linear(int(hidden_size / 2), n_actions))