def query_tab_names(): """ Create Tabs from maya-type sTypes """ search_type = 'sthpw/search_object' if env.Mode().get == 'standalone': filters = [('type', env.Env().get_types_list()), ('namespace', env.Env().get_namespace())] else: filters = [('type', env.Mode().get), ('namespace', env.Env().get_namespace())] assets = server_query(search_type, filters) out_tabs = { 'names': [], 'codes': [], 'layouts': [], 'colors': [], } if assets: for asset in assets: asset_get = asset.get out_tabs['names'].append(asset_get('title')) out_tabs['codes'].append(asset_get('code')) out_tabs['layouts'].append(asset_get('layout')) out_tabs['colors'].append(asset_get('color')) return out_tabs
def generate_skey(pipeline_code=None, code=None): skey = 'skey://{0}/{1}?project={2}&code={3}'.format(env.Env().get_namespace(), pipeline_code, env.Env().get_project(), code) return skey
def slide_images(self, value): image_path_icon = '{0}/{1}/{2}'.format( env.Env().get_asset_dir(), self.icon_list[value - 1]['relative_dir'], self.icon_list[value - 1]['file_name']) if not self.playblast: image_path_big = '{0}/{1}/{2}'.format( env.Env().get_asset_dir(), self.main_list[value - 1]['relative_dir'], self.main_list[value - 1]['file_name']) self.preview_image = QtGui.QImage(0, 0, QtGui.QImage.Format_ARGB32) if not self.external: self.preview_image.load(image_path_big) else: self.preview_image.load(image_path_icon) self.preview_pixmap = QtGui.QPixmap.fromImage( self.preview_image).scaled(self.size(), QtCore.Qt.KeepAspectRatio, QtCore.Qt.SmoothTransformation) self.scene = QtGui.QGraphicsScene(self) self.scene.addPixmap(self.preview_pixmap) self.previewGraphicsView.setScene(self.scene) self.previewGraphicsView.fitInView(self.scene.sceneRect(), QtCore.Qt.KeepAspectRatio) if self.playblast: return image_path_icon else: return image_path_big
def server_auth(host, project, login, password): tactic_srv = tactic_client_lib.TacticServerStub.get(setup=False) srv = host prj = project tactic_srv.set_server(srv) tactic_srv.set_project(prj) log = login psw = password ticket = env.Env().get_ticket() if not ticket: ticket = tactic_srv.get_ticket(log, psw) env.Env().set_ticket(ticket) tactic_srv.set_ticket(ticket) return tactic_srv
def context_query(process): """ Query for Context elements Creating one list of lists, to reduce count of queries to the server :param process - list of tab names (vfx/asset) """ search_type = 'sthpw/pipeline' filters = [('search_type', process), ('project_code', env.Env().get_project())] assets = server_query(search_type, filters) if assets: # TODO may be worth it to simplify this contexts = collections.OrderedDict() for proc in process: contexts[proc] = [] items = contexts.copy() for context in contexts: for asset in assets: if context == asset['search_type']: contexts[context] = Et.fromstring(asset['pipeline'].encode('utf-8')) for key, val in contexts.iteritems(): if len(val): for element in val.iter('process'): items[key].append(element.attrib['name']) return items
def test(actor): env = environment.Env() state_size = env.state_size action_size = env.action_size env.reset() # env.R.body.wb = np.array([[1.], [-1.], [1.]]) # env.R.body.vs = np.array([[0.], [2.], [1.]]) for t in range(int(endtime / env.R.dtime)): action = actor(np.reshape(env.state, [1, env.state_size]), batch_size=1) # np.zeros((12), dtype=np.float64) next_state, reward, done = env.step(action) points = np.concatenate([ np.reshape(env.R.body.Rnow, (4, 3)), np.reshape(env.R.joints, (12, 3)) ], axis=0) if (t == 0): plt.show() if (done == 1): print(done) break plot_robot(points) plt.pause(0.01) ax.clear()
def theta2plot(theta): # theta: np.array([timeN, 12]) env = environment.Env() state_size = env.state_size action_size = env.action_size before_theta = np.zeros(12) for p in range(env.R.numLeg): for i in range(env.R.numsubleg): before[p * env.R.numsubleg + i] = env.R.leg[p].sub[i].theta omegas = np.array([theta.shape[0] - 1, theta.shape[1]]) for t in range(theta.shape[0] - 1): omegas[t] = (theta[t + 1] - theta[t]) / env.R.dtime env.reset_theta(theta[0]) # env.R.body.wb = np.array([[1.], [-1.], [1.]]) # env.R.body.vs = np.array([[0.], [2.], [1.]]) for t in range(int(endtime / env.R.dtime)): print('t = ', t) # np.zeros((12), dtype=np.float64) next_state, reward, done = env.step(omegas[t]) points = np.concatenate([ np.reshape(env.R.body.Rnow, (4, 3)), np.reshape(env.R.joints, (12, 3)) ], axis=0) if (t == 0): plt.show() if (done == 1): print(done) break plot_robot(points) plt.pause(0.01) ax.clear()
def readSettings(self): """ Reading Settings """ self.settings.beginGroup(env.Mode().get + '/ui_checkout') tab_name = self.objectName().split('/') group_path = '{0}/{1}/{2}'.format(tab_name[0], env.Env().get_project(), tab_name[1]) self.settings.beginGroup(group_path) self.commentsSplitter.restoreState( self.settings.value('commentsSplitter')) self.descriptionSplitter.restoreState( self.settings.value('descriptionSplitter')) self.imagesSplitter.restoreState(self.settings.value('imagesSplitter')) self.searchLineEdit.setText( self.settings.value('searchLineEdit_text', '')) self.contextComboBox.setCurrentIndex( self.settings.value('contextComboBox', 0)) self.add_items_to_results(self.searchLineEdit.text()) try: gf.revert_expanded_state(self.resultsTreeWidget, self.settings.value( 'resultsTreeWidget_isExpanded', None), expand=True) gf.revert_expanded_state(self.resultsTreeWidget, self.settings.value( 'resultsTreeWidget_isSelected', None), select=True) except: pass self.settings.endGroup() self.settings.endGroup()
def fill_notes(self): self.conversationScrollArea.close() self.create_scroll_area() self.current_user = env.Env().get_user() self.task_item.get_notes() self.widgets_list = [] for proc in self.task_item.notes.itervalues(): for context in proc.contexts.itervalues(): for note in reversed(list(context.items.itervalues())): if note.info['process'] == self.task_item.info['process']: if note.info['login'] == self.current_user: self.note_widget = Ui_outcomWidget(note, self) self.lay.addWidget(self.note_widget) self.widgets_list.append(self.note_widget) else: self.note_widget = Ui_incomWidget(note, self) self.lay.addWidget(self.note_widget) self.widgets_list.append(self.note_widget) # looks like duct tape self.conversationScrollArea.show() # TODO make scroll_to_widget using line above # print(self.widgets_list[-1].height()) self.conversationScrollArea.verticalScrollBar().setValue( self.conversationScrollArea.verticalScrollBar().maximum())
def perform_save(self): """ Scope all Edits for save :return: """ if self.projectInfoCodeLabel.text(): env.Env().set_project(self.projectInfoCodeLabel.text()) self.restart()
def __init__(self, parent=None): super(self.__class__, self).__init__(parent=parent) self.settings = QtCore.QSettings('TACTIC Handler', 'TACTIC Handling Tool') self.tactic_project = env.Env().get_project() self.setupUi(self) self.readSettings() self.tab_actions()
def play_games(self, agent:agent.AbstractAgent, buffer:replay_buffer.DataContainer): for _ in range(self.config.game_count_per_iteration): t = 0 env = environment.Env() T = replay_buffer.Trajectory() while not env.terminate(): state = env.get_state() action = agent.act(state, t%2) env.step(action) env.render() T.add(state, action) t += 1 T.result = env.result() buffer.save_game(T)
def query_notes(s_code, process=None): """ Query for Notes :param s_code: Code of asset related to note :param process: Process code :return: """ search_type = 'sthpw/note' if process: filters = [('search_code', s_code), ('process', process), ('project_code', env.Env().get_project())] else: filters = [('search_code', s_code), ('project_code', env.Env().get_project())] return server_query(search_type, filters)
def query_snapshots(process_list=None, s_code=None): """ Query for snapshots belongs to asset :return: list of snapshots """ process_codes = list(process_list) process_codes.extend(['icon', 'attachment', 'publish']) filters_snapshots = [ ('process', process_codes), ('project_code', env.Env().get_project()), ('search_code', s_code), ] return server_start().query_snapshots(filters=filters_snapshots, include_files=True)
def query_snapshots(s_code, process=None, user=None): """ Query for Snapshots :param s_code: Code of asset related to snapshot :param process: Process code :param user: Optional users names :return: """ # TODO Per users query if process: filters = [('search_code', s_code), ('process', process), ('project_code', env.Env().get_project())] else: filters = [('search_code', s_code), ('project_code', env.Env().get_project())] return server_start().query_snapshots(filters=filters, include_files=True)
def query_tasks(s_code, process=None, user=None): """ Query for Task :param s_code: Code of asset related to task :param process: Process code :param user: Optional users names :return: """ # TODO Per users query search_type = 'sthpw/task' if process: filters = [('search_code', s_code), ('process', process), ('project_code', env.Env().get_project())] else: filters = [('search_code', s_code), ('project_code', env.Env().get_project())] return server_query(search_type, filters)
def enjoyPrius(args): prius = simulator.Prius(args) env = En.Env(args.road, args.vehicle, args.track) while True: print(env._terminal(prius.collisions())) #env.testRender(pos) #time.sleep(0.5) #for i in range(0,90): # pos.position.x = 42 + i/30 # pos.position.y = i / 30 # pos.orientation.x = i/90 * math.pi/2 # print str(pos) # print(env.testRender(pos)) # #env.testRender(pos) # time.sleep(0.05) #time.sleep(0.5) #while True: # if args.mode == 0: # prius.control_prius(args.value,-prius.pose().position.y/80,0) # if args.mode == 1: # prius.control_world() # break # time.sleep(0.1) #while True: #prius.control_world(False) #time.sleep(5) #print str(prius.pose()) #if args.mode == 0: # #prius.control_prius(args.value,-prius.pose().position.y/80,0) # #prius.control_prius(args.value,-prius.pose().position.y/20,0) # prius.control_prius(args.value,0.2,0) #if args.mode == 1: # prius.control_world() # break #print prius.pose().orientation.x #print str(prius.collisions()) #print prius.collisions().position.x #print str(prius.velocity()) print '====================' pass
def writeSettings(self): """ Writing Settings """ self.settings.beginGroup(env.Mode().get + '/ui_checkin') tab_name = self.objectName().split('/') group_path = '{0}/{1}/{2}'.format(tab_name[0], env.Env().get_project(), tab_name[1]) self.settings.beginGroup(group_path) self.settings.setValue('commentsSplitter', self.commentsSplitter.saveState()) self.settings.setValue('descriptionSplitter', self.descriptionSplitter.saveState()) self.settings.setValue('imagesSplitter', self.imagesSplitter.saveState()) self.settings.setValue('dropPlateSplitter', self.dropPlateSplitter.saveState()) self.settings.setValue('searchOptionsSplitter', self.searchOptionsSplitter.saveState()) self.settings.setValue('searchLineEdit_text', self.searchLineEdit.text()) self.settings.setValue('contextComboBox', self.contextComboBox.currentIndex()) self.settings.setValue( 'searchByCodeRadioButton', self.searchOptionsGroupBox.byCodeRadioButton.isChecked()) self.settings.setValue( 'searchByNameRadioButton', self.searchOptionsGroupBox.byNameRadioButton.isChecked()) self.settings.setValue( 'searchAllProcessCheckBox', self.searchOptionsGroupBox.showAllProcessCheckBox.isChecked()) if self.resultsTreeWidget.topLevelItemCount() > 0: self.settings.setValue( 'resultsTreeWidget_isSelected', gf.expanded_state(self.resultsTreeWidget, is_selected=True)) self.settings.setValue( 'resultsTreeWidget_isExpanded', gf.expanded_state(self.resultsTreeWidget, is_expanded=True)) print('Done ui_checkin_tree ' + self.objectName() + ' settings write') self.settings.endGroup() self.settings.endGroup()
def readSettings(self): """ Reading Settings """ self.userNameLineEdit.setText(env.Env().get_user()) self.passwordLineEdit.setText(env.Env().get_user()) self.tacticEnvLineEdit.setText(env.Env().get_data_dir()) self.tacticAssetDirLineEdit.setText(env.Env().get_asset_dir()) self.tacticInstallDirLineEdit.setText(env.Env().get_install_dir()) self.tacticServerLineEdit.setText(env.Env().get_server()) if env.Mode().get == 'maya': self.currentWorkdirLineEdit.setText(cmds.workspace(q=True, dir=True)) self.settings.beginGroup(env.Mode().get + '/ui_conf') self.configToolBox.setCurrentIndex(self.settings.value('configToolBox', 0)) self.settings.endGroup()
def get_current_item_paths(self): nested_item = self.resultsTreeWidget.itemWidget( self.resultsTreeWidget.currentItem(), 0) file_path = None dir_path = None all_process = None modes = env.Mode().mods modes.append('main') for mode in modes: if nested_item.files.get(mode): main_file = nested_item.files[mode][0] asset_dir = env.Env().get_asset_dir() file_path = '{0}/{1}/{2}'.format(asset_dir, main_file['relative_dir'], main_file['file_name']) split_path = main_file['relative_dir'].split('/') dir_path = '{0}/{1}'.format(asset_dir, '{0}/{1}/{2}'.format(*split_path)) all_process = nested_item.sobject.all_process return file_path, dir_path, all_process
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): # ---------------------------- print("Preparing for workers...") # ---------------------------- pg_learners = [] envs = [] nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) ### create sequence of environments for each of the num_ex job sets/sequences for ex in xrange(pa.num_ex): print "-prepare for env-", ex env = environment.Env(pa, nw_len_seqs=nw_len_seqs, nw_size_seqs=nw_size_seqs, render=False, repre=repre, end=end) env.seq_no = ex envs.append(env) ### generate sequence of NNs for each batch, each of which is a a policy gradient agent for ex in xrange(pa.batch_size + 1): # last worker for updating the parameters print "-prepare for worker-", ex pg_learner = pg_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = cPickle.load(net_handle) pg_learner.set_net_params(net_params) pg_learners.append(pg_learner) accums = init_accums(pg_learners[pa.batch_size]) # -------------------------------------- print("Preparing for reference data...") # -------------------------------------- ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=False, plot=False, repre=repre, end=end) mean_rew_lr_curve = [] max_rew_lr_curve = [] slow_down_lr_curve = [] # -------------------------------------- print("Start training...") # -------------------------------------- timer_start = time.time() for iteration in xrange(1, pa.num_epochs): ### use a thread for each use manager to share results across threads ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_indices = range(pa.num_ex) np.random.shuffle(ex_indices) all_eprews = [] grads_all = [] loss_all = [] eprews = [] eplens = [] all_slowdown = [] all_entropy = [] ex_counter = 0 ### for each jobset for ex in xrange(pa.num_ex): ex_idx = ex_indices[ex] ### evaluate several instances of trajectories for set of PG agents p = Process(target=get_traj_worker, args=( pg_learners[ex_counter], envs[ex_idx], pa, manager_result, )) ps.append(p) ex_counter += 1 ## if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print ex, "out of", pa.num_ex ex_counter = 0 for p in ps: p.start() for p in ps: p.join() result = [] # convert list from shared memory for r in manager_result: result.append(r) ps = [] manager_result = manager.list([]) all_ob = concatenate_all_ob_across_examples( [r["all_ob"] for r in result], pa) all_action = np.concatenate([r["all_action"] for r in result]) all_adv = np.concatenate([r["all_adv"] for r in result]) # Do policy gradient update step, using the first agent # put the new parameter in the last 'worker', then propagate the update at the end grads = pg_learners[pa.batch_size].get_grad( all_ob, all_action, all_adv) grads_all.append(grads) all_eprews.extend([r["all_eprews"] for r in result]) eprews.extend(np.concatenate([r["all_eprews"] for r in result ])) # episode total rewards eplens.extend(np.concatenate([r["all_eplens"] for r in result ])) # episode lengths all_slowdown.extend( np.concatenate([r["all_slowdown"] for r in result])) all_entropy.extend( np.concatenate([r["all_entropy"] for r in result])) # assemble gradients grads = grads_all[0] for i in xrange(1, len(grads_all)): for j in xrange(len(grads)): grads[j] += grads_all[i][j] # propagate network parameters to others params = pg_learners[pa.batch_size].get_params() rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) for i in xrange(pa.batch_size + 1): pg_learners[i].set_net_params(params) timer_end = time.time() print "-----------------" print "Iteration: \t %i" % iteration print "NumTrajs: \t %i" % len(eprews) print "NumTimesteps: \t %i" % np.sum(eplens) # print "Loss: \t %s" % np.mean(loss_all) print "MaxRew: \t %s" % np.average([np.max(rew) for rew in all_eprews]) print "MeanRew: \t %s +- %s" % (np.mean(eprews), np.std(eprews)) print "MeanSlowdown: \t %s" % np.mean(all_slowdown) print "MeanLen: \t %s +- %s" % (np.mean(eplens), np.std(eplens)) print "MeanEntropy \t %s" % (np.mean(all_entropy)) print "Elapsed time\t %s" % (timer_end - timer_start), "seconds" print "-----------------" timer_start = time.time() max_rew_lr_curve.append(np.average([np.max(rew) for rew in all_eprews])) mean_rew_lr_curve.append(np.mean(eprews)) slow_down_lr_curve.append(np.mean(all_slowdown)) if iteration % pa.output_freq == 0: param_file = open( pa.output_filename + '_' + str(iteration) + '.pkl', 'wb') cPickle.dump(pg_learners[pa.batch_size].get_params(), param_file, -1) param_file.close() pa.unseen = True slow_down_cdf.launch(pa, pa.output_filename + '_' + str(iteration) + '.pkl', render=False, plot=True, repre=repre, end=end) pa.unseen = False # test on unseen examples plot_lr_curve(pa.output_filename, max_rew_lr_curve, mean_rew_lr_curve, slow_down_lr_curve, ref_discount_rews, ref_slow_down)
def launch(pa, pg_resume=None, render=False, plot=False, repre='image', end='no_new_job'): # ---- Parameters ---- test_types = ['Tetris', 'SJF', 'Random'] if pg_resume is not None: test_types = ['PG'] + test_types env = environment.Env(pa, render, repre=repre, end=end) all_discount_rews = {} jobs_slow_down = {} work_complete = {} work_remain = {} job_len_remain = {} num_job_remain = {} job_remain_delay = {} for test_type in test_types: all_discount_rews[test_type] = [] jobs_slow_down[test_type] = [] work_complete[test_type] = [] work_remain[test_type] = [] job_len_remain[test_type] = [] num_job_remain[test_type] = [] job_remain_delay[test_type] = [] for seq_idx in xrange(pa.num_ex): print('\n\n') print("=============== " + str(seq_idx) + " ===============") for test_type in test_types: rews, info = get_traj(test_type, pa, env, pa.episode_max_length, pg_resume) print "---------- " + test_type + " -----------" print "total discount reward : \t %s" % (discount(rews, pa.discount)[0]) all_discount_rews[test_type].append( discount(rews, pa.discount)[0] ) # ------------------------ # ---- per job stat ---- # ------------------------ enter_time = np.array([info.record[i].enter_time for i in xrange(len(info.record))]) finish_time = np.array([info.record[i].finish_time for i in xrange(len(info.record))]) job_len = np.array([info.record[i].len for i in xrange(len(info.record))]) job_total_size = np.array([np.sum(info.record[i].res_vec) for i in xrange(len(info.record))]) finished_idx = (finish_time >= 0) unfinished_idx = (finish_time < 0) jobs_slow_down[test_type].append( (finish_time[finished_idx] - enter_time[finished_idx]) / job_len[finished_idx] ) work_complete[test_type].append( np.sum(job_len[finished_idx] * job_total_size[finished_idx]) ) work_remain[test_type].append( np.sum(job_len[unfinished_idx] * job_total_size[unfinished_idx]) ) job_len_remain[test_type].append( np.sum(job_len[unfinished_idx]) ) num_job_remain[test_type].append( len(job_len[unfinished_idx]) ) job_remain_delay[test_type].append( np.sum(pa.episode_max_length - enter_time[unfinished_idx]) ) env.seq_no = (env.seq_no + 1) % env.pa.num_ex # -- matplotlib colormap no overlap -- if plot: num_colors = len(test_types) cm = plt.get_cmap('gist_rainbow') fig = plt.figure() ax = fig.add_subplot(111) ax.set_color_cycle([cm(1. * i / num_colors) for i in range(num_colors)]) for test_type in test_types: slow_down_cdf = np.sort(np.concatenate(jobs_slow_down[test_type])) slow_down_yvals = np.arange(len(slow_down_cdf))/float(len(slow_down_cdf)) ax.plot(slow_down_cdf, slow_down_yvals, linewidth=2, label=test_type) plt.legend(loc=4) plt.xlabel("job slowdown", fontsize=20) plt.ylabel("CDF", fontsize=20) # plt.show() plt.savefig(pg_resume + "_slowdown_fig" + ".pdf") return all_discount_rews, jobs_slow_down
import environment import time height, width = 30, 30 vehNum = 3 CrossRoad = environment.Env(vehNum, height, width, 4) CrossRoad.showEnv_init() for count in range(1): collisionFlag = False endFlag = False tag = 0 CrossRoad.reStart() print() while not (collisionFlag or endFlag): action = [0] * vehNum [state, reward, collisionFlag, endFlag] = CrossRoad.updateEnv(action) CrossRoad.showEnv() tag += 1 print(count, "step: ", tag, "collision?: ", collisionFlag, "end?: ", endFlag) print(state) time.sleep(2)
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): env = environment.Env(pa, render=False, repre=repre, end=end) pg_learner = pg_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'r') net_params = cPickle.load(net_handle) pg_learner.set_net_params(net_params) if pa.evaluate_policy_name == "SJF": evaluate_policy = other_agents.get_sjf_action elif pa.evaluate_policy_name == "PACKER": evaluate_policy = other_agents.get_packer_action else: print("Panic: no policy known to evaluate.") exit(1) # ---------------------------- print("Preparing for data...") # ---------------------------- nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) # print 'nw_time_seqs=', nw_len_seqs # print 'nw_size_seqs=', nw_size_seqs mem_alloc = 4 X = np.zeros([ pa.simu_len * pa.num_ex * mem_alloc, 1, pa.network_input_height, pa.network_input_width ], dtype=theano.config.floatX) y = np.zeros(pa.simu_len * pa.num_ex * mem_alloc, dtype='int32') print 'network_input_height=', pa.network_input_height print 'network_input_width=', pa.network_input_width counter = 0 for train_ex in range(pa.num_ex): env.reset() for _ in xrange(pa.episode_max_length): # ---- get current state ---- ob = env.observe() a = evaluate_policy(env.machine, env.job_slot) if counter < pa.simu_len * pa.num_ex * mem_alloc: add_sample(X, y, counter, ob, a) counter += 1 ob, rew, done, info = env.step(a, repeat=True) if done: # hit void action, exit break # roll to next example env.seq_no = (env.seq_no + 1) % env.pa.num_ex num_train = int(0.8 * counter) num_test = int(0.2 * counter) X_train, X_test = X[:num_train], X[num_train:num_train + num_test] y_train, y_test = y[:num_train], y[num_train:num_train + num_test] # Normalization, make sure nothing becomes NaN # X_mean = np.average(X[:num_train + num_test], axis=0) # X_std = np.std(X[:num_train + num_test], axis=0) # # X_train = (X_train - X_mean) / X_std # X_test = (X_test - X_mean) / X_std # ---------------------------- print("Start training...") # ---------------------------- for epoch in xrange(pa.num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_acc = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, pa.batch_size, shuffle=True): inputs, targets = batch err, prob_act = pg_learner.su_train(inputs, targets) pg_act = np.argmax(prob_act, axis=1) train_err += err train_acc += np.sum(pg_act == targets) train_batches += 1 # # And a full pass over the test data: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, pa.batch_size, shuffle=False): inputs, targets = batch err, prob_act = pg_learner.su_test(inputs, targets) pg_act = np.argmax(prob_act, axis=1) test_err += err test_acc += np.sum(pg_act == targets) test_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, pa.num_epochs, time.time() - start_time)) print(" training loss: \t\t{:.6f}".format(train_err / train_batches)) print(" training accuracy:\t\t{:.2f} %".format( train_acc / float(num_train) * 100)) print(" test loss: \t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy: \t\t{:.2f} %".format(test_acc / float(num_test) * 100)) sys.stdout.flush() if epoch % pa.output_freq == 0: net_file = open( pa.output_filename + '_net_file_' + str(epoch) + '.pkl', 'wb') cPickle.dump(pg_learner.return_net_params(), net_file, -1) net_file.close() print("done")
def enjoyPrius(args): # controlling loop global going_on signal.signal(signal.SIGINT, handler) going_on = True # prius and its's environment prius = simulator.Prius(args) thread_update_pos = threading.Thread(target = updatePriusPos, args = (prius,)) thread_update_collisions = threading.Thread(target = updatePriusCollision, args = (prius,)) thread_update_pos.start() thread_update_collisions.start() env = En.Env(args.road, args.vehicle, args.track) # network session and it's parameters sess = tf.InteractiveSession() inputState, outputQ, h_fc1 = createNetwork() # action is choosed by policy pi action = tf.placeholder("float", [None, ACTIONS]) # optimal q value of actions taken just now target_q = tf.placeholder("float", [None]) # real q value when these actions selected and actuated action_q = tf.reduce_sum(tf.multiply(outputQ , action), reduction_indices=1) # target is q -> *q cost = tf.reduce_mean(tf.square(target_q - action_q)) train_step = tf.train.AdamOptimizer(1e-6).minimize(cost) # saving and loading networks saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) checkpoint = tf.train.get_checkpoint_state("saved_networks") if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) while terminal: prius.reset() time.sleep(0.2) x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) # control prius by pedal percent 0.2, hand steering is 0, brake pedal is 0 prius.control_prius(0.2, 0, 0) x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret , x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY) state_t = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2) state_t1 = state_t epsilon = INITIAL_EPSILON step = 1 store = deque() simTime = time.time() while going_on: # always run at high frequency x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) # train when step over observation stage if step > OBSERVE: # sample a minibatch to train minibatch = random.sample(store, BATCH) # get the batch variables state_j_batch = [d[0] for d in minibatch] action_batch = [d[1] for d in minibatch] reward_batch = [d[2] for d in minibatch] state_j1_batch = [d[3] for d in minibatch] # qV_batch = [] target_q_batch = [] q_action1_batch = outputQ.eval(feed_dict = {inputState: state_j1_batch}) for i in range(0, len(minibatch)): terminal_j = minibatch[i][4] if terminal_j: target_q_batch.append(reward_batch[i]) else: target_q_batch.append(reward_batch[i] + GAMMA * \ ( np.max(q_action1_batch[i][0:2] ) + \ np.max(q_action1_batch[i][2:4] ) + \ np.max(q_action1_batch[i][4:6] ) + \ np.max(q_action1_batch[i][6:8] ) + \ np.max(q_action1_batch[i][8:10] ) + \ np.max(q_action1_batch[i][10:12]) +\ np.max(q_action1_batch[i][12:14]) +\ np.max(q_action1_batch[i][14:16]) +\ np.max(q_action1_batch[i][16:18]) +\ np.max(q_action1_batch[i][18:20]) ) ) train_step.run(feed_dict = { target_q: target_q_batch, action: action_batch, inputState: state_j_batch } ) # save progress every 10000 iterations if step % 10000 == 0: saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = step) # step > OBSERVE end train_step # control by frequecy of 10HZ if (time.time() - simTime < 0.098) and (not terminal): continue simTime = time.time() ## current q value #readout_t = readout.eval(feed_dict={s : [s_t]})[0] qV_t = outputQ.eval(feed_dict={inputState : [state_t]})[0] ## epsilon-greedy policy action_array_t = np.zeros([ACTIONS]) action_angle_t = 0 # scale down epsilon if epsilon > FINAL_EPSILON and step > OBSERVE: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE if random.random() < epsilon: action_array_t, action_angle_t = getRandomAction() else: action_array_t, action_angle_t = getAction(qV_t) prius.control_prius(0.2, -1*action_angle_t, 0) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret , x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY) x_t = np.reshape(x_t, (160, 160, 1)) state_t1 = np.append(x_t, state_t[:, :, :9], axis=2) store.append((state_t, action_array_t, reward, state_t1, terminal)) if len(store) > REPLAY_MEMORY: store.popleft() # print on-time reward line1 = "step :====================== ",step,"======================== " line3 = "reward ",reward," " if terminal: printRed(line1) printRed(line3) elif reward < 0.04: printYellow(line1) printYellow(line3) elif reward < 0.1: printCyan(line1) printCyan(line3) else: printGreen(line1) printGreen(line3) # if terminal, restart if terminal: prius.reset() time.sleep(0.2) x_t, reward, terminal = env.render(prius.collisions(), prius.pose()) x_t = cv2.cvtColor(cv2.resize(x_t, (160, 160)), cv2.COLOR_BGR2GRAY) ret , x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY) state_t1 = np.stack((x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t, x_t), axis=2) state_t = state_t1 step += 1
def save_scene(search_key, context, description, all_process): # add info about particular scene skey_link = 'skey://{0}&context={1}'.format(search_key, context) if not cmds.attributeQuery( 'tacticHandler_skey', node='defaultObjectSet', exists=True): cmds.addAttr('defaultObjectSet', longName='tacticHandler_skey', dataType='string') cmds.setAttr('defaultObjectSet.tacticHandler_skey', skey_link, type='string') # get template names for scene and playblast image temp_dir = env.Env().get_temp_dir() random_uuid = uuid.uuid4() types = { 'mayaBinary': 'mb', 'mayaAscii': 'ma', } temp_file = '{0}/{1}.ma'.format(temp_dir, random_uuid) temp_playblast = '{0}/{1}.jpg'.format(temp_dir, random_uuid) # rename file, save scene, playblast, get saving format cmds.file(rename=temp_file) cmds.file(save=True, type='mayaAscii') current_frame = cmds.currentTime(query=True) cmds.playblast(forceOverwrite=True, format='image', completeFilename=temp_playblast, showOrnaments=False, widthHeight=[960, 540], sequenceTime=False, frame=[current_frame], compression='jpg', offScreen=True, viewer=False, percent=100) # check in snapshot snapshot = tc.checkin_snapshot(search_key, context, temp_file, file_type='maya', is_current=True, description=description) # from pprint import pprint # pprint(snapshot) # retrieve checked in snapshot file info asset_dir = env.Env().get_asset_dir() file_sobject = snapshot['__file_sobjects__'][0] relative_dir = file_sobject['relative_dir'] file_name = file_sobject['file_name'] # make proper file path, and dir path to set workspace new_file = '{0}/{1}/{2}'.format(asset_dir, relative_dir, file_name) split_path = relative_dir.split('/') dir_path = '{0}/{1}'.format(asset_dir, '{0}/{1}/{2}'.format(*split_path)) set_workspace(dir_path, all_process) # check in playblast tc.checkin_playblast(snapshot['code'], temp_playblast) # set proper scene name cmds.file(rename=new_file)
def __init__( self, # ddqn parameters connection_label="lonely_worker", q_network_type='simple', target_q_network_type='simple', gamma=0.99, target_update_freq=10000, train_freq=3, num_burn_in=300, batch_size=32, optimizer='adam', loss_func="mse", max_ep_length=1000, experiment_id="Exp_1", model_checkpoint=True, opt_metric=None, # environment parameters net_file="cross.net.xml", route_file="cross.rou.xml", network_dir="./network", demand="nominal", state_shape=(1, 11), num_actions=2, use_gui=False, delta_time=10, reward="balanced", # memory parameters max_size=100000, # additional parameters policy="linDecEpsGreedy", eps=0.1, num_episodes=2, monitoring=False, episode_recording=False, hparams=None): if hparams: args_description = locals() args_description = str( {key: args_description[key] for key in hparams}) else: args_description = "single_worker" self.connection_label = connection_label self.q_network_type = q_network_type self.target_q_network_type = target_q_network_type self.gamma = gamma self.target_update_freq = target_update_freq self.train_freq = train_freq self.num_burn_in = num_burn_in self.batch_size = batch_size self.optimizer = optimizer self.loss_func = loss_func self.max_ep_length = max_ep_length self.experiment_id = experiment_id self.model_checkpoint = model_checkpoint self.opt_metric = opt_metric # additional parameters self.policy = policy self.eps = eps self.num_episodes = num_episodes self.monitoring = monitoring self.episode_recording = episode_recording self.output_dir, self.summary_writer_folder = tools.get_output_folder( "./logs", self.experiment_id, args_description) self.summary_writer = tf.summary.FileWriter( logdir=self.summary_writer_folder) # environment parameters self.net_file = os.path.join(network_dir, net_file) self.route_file = os.path.join(self.output_dir, route_file) self.demand = demand self.state_shape = state_shape self.num_actions = num_actions self.use_gui = use_gui self.delta_time = delta_time self.reward = reward # memory parameters self.max_size = max_size self.state_shape = state_shape # Initialize Q-networks (value and target) self.q_network = agent.get_model(model_name=self.q_network_type, input_shape=(self.state_shape[1], ), num_actions=self.num_actions) self.target_q_network = agent.get_model( model_name=self.target_q_network_type, input_shape=(self.state_shape[1], ), num_actions=self.num_actions) # Initialize environment self.env = environment.Env(connection_label=self.connection_label, net_file=self.net_file, route_file=self.route_file, demand=self.demand, state_shape=self.state_shape, num_actions=self.num_actions, policy=self.policy, use_gui=self.use_gui, eps=self.eps, reward=self.reward) # Initialize replay memory self.memory = memory.ReplayMemory(max_size=self.max_size, state_shape=self.state_shape, num_actions=self.num_actions) # Initialize Double DQN algorithm self.ddqn = doubledqn.DoubleDQN( q_network=self.q_network, target_q_network=self.target_q_network, memory=self.memory, gamma=self.gamma, target_update_freq=self.target_update_freq, train_freq=self.train_freq, num_burn_in=self.num_burn_in, batch_size=self.batch_size, optimizer=self.optimizer, loss_func=self.loss_func, max_ep_length=self.max_ep_length, env_name=self.env, output_dir=self.output_dir, monitoring=self.monitoring, episode_recording=self.episode_recording, experiment_id=self.experiment_id, summary_writer=self.summary_writer) # Store initialization prameters self.store_init(locals())
def train(supply_distribution: Tuple[dict, list], demand_distribution: Tuple[dict, list], model_name: str, demand: int, max_day: int, training_timesteps_list: str, tblog: str, max_age: int = 35, obs_method: int = 1, doi: int = 4) -> str: """ Train the agent First train without evaluation Second train with in-training evaluation :param demand_distribution: dict of {blood group : prevalence }, list of antigens included of the demand :param supply_distribution: dict of {blood group : prevalence }, list of antigens included of the supply :param model_name: str: name of the model to be stored :param demand: int: number of blood that is supplied / requested :param max_day: int: number of days per episode :param training_timesteps_list: list: [number of episodes without evaluation, number of episodes with evaluation] :param tblog: str, name of the tensorboard log :param max_age: int, max age of the RBCs :param obs_method: int, 1 or 2: item requested one-hot-encoded (1) or binary (2) :param doi: int, number of days of inventory :return: file name: str, name of the model that is stored """ # Initialize parameters GAMMA = round(1 - (1 / (35 * demand)), 5) # 0.993 state_type = 'custom_category' time_string = datetime.now().strftime("%Y_%m_%d_%H_%M") file_name = time_string + model_name max_reward = max_day * demand * 0.1 # Create environment env = environment.Env(supply_distribution[0], demand_distribution[0], max_age, demand, doi=doi, obs_method=obs_method, state_type=state_type, max_day=max_day, file_name=file_name, verbose=0) env = DummyVecEnv([lambda: env]) model = PPO2(MlpPolicy, env, gamma=GAMMA, verbose=0, tensorboard_log="results/tensorboard_data/" + tblog + "/") # create model # Train the model without evaluation (=faster) print('start phase 1, without evaluation') model.learn(total_timesteps=training_timesteps_list[0], tb_log_name=file_name) # TB- run: tensorboard --logdir ./tblog/ # Export model.save('results/model/' + file_name) # Save for backup callback_on_best = StopTrainingOnDecayingRewardThreshold( max_reward=max_reward, episode_decay=training_timesteps_list[2], reward_decay=0.05, no_reward_episodes=training_timesteps_list[0], verbose=1) # Callback for evaluation eval_callback = EvalCallback( env, # callback_on_new_best=callback_on_best, best_model_save_path='results/model/' + file_name, eval_freq=50000, verbose=1, n_eval_episodes=5) # Train the model with eval every 50000 steps print('start phase 2 with evaluation') model.learn(total_timesteps=training_timesteps_list[1], tb_log_name=file_name, callback=eval_callback, reset_num_timesteps=False ) # train the model and run tensorboard 5000000 1500000 # Export model.save('results/model/' + file_name + 'end') # Save for backup # Extract the tensorboard data data_extract.extract_tb(file_name) return file_name
def solve(supply_distribution: Tuple[dict, list], demand_distribution: Tuple[dict, list], model_name: str, export_model: str, max_age: int, demand: int, doi: int, n_warm_start_days: int, n_days: int, obs_method: int, state_type: str) -> dict: """ :param demand_distribution: Tuple[dict, list] containing a dict with {blood_group : distribution}, list of included antigens :param supply_distribution: Tuple[dict, list] containing a dict with {blood_group : distribution}, list of included antigens :param model_name: str, name of the model that is used to store the results :param export_model: str, name of hte model that is trained :param max_age: int, max age of the RBCs :param demand: int, number of demand / supply per day :param doi: days of inventory, the number of days the inventory is filled before first supply :param n_warm_start_days: int, number of days of warm start :param n_days: int, number of days for evaluation :param obs_method: int, 1 or 2: item requested one-hot-encoded (1) or binary (2) :param state_type: type of state that is used 'custom category' :return: """ # Get model ready env = environment.Env(supply_distribution[0], demand_distribution[0], max_age, demand, doi, obs_method=obs_method, state_type=state_type, file_name=model_name) env = DummyVecEnv([lambda: env]) model = PPO2.load(export_model, env=env) # Run model obs = env.reset() # Warm start print('warm start - started') env.env_method('set_days', n_warm_start_days) done = False while not done: action, _states = model.predict(obs, deterministic=True) obs_next, rewards, done, info = env.step(action) obs = obs_next print('warm start - ended') # Testing print('Testing - started') env.env_method('set_days', n_days) env.env_method('change_eval_boolean', True) done = False while not done: action, _states = model.predict(obs, deterministic=True) obs_next, rewards, done, info = env.step(action) obs = obs_next results = env.env_method('render_blood_specific') # get evaluation metrics print('Testing - ended') return results
def get_server(): return server_auth(env.Env().get_server(), env.Env().get_project(), env.Env().get_user(), env.Env().get_pass())