def log_training_start_information(self): text = ("\n Agent: {}\n".format(self.agent_name) + " ActionWrapper: {}\n".format(self.action_wrapper_name) + " StateBuilder: {}\n".format(self.state_builder_name) + " RewardBuilder: {}\n".format(self.reward_builder_name) + " Environment: {}\n".format(self.env_name) + " Model: {}\n".format(self.model_name)) if (hasattr(self.model, "lib")): if self.model.neural_net_class != None: if self.model.lib == constants.Libraries.KERAS: stringlist = [] self.model.dnn.model.summary( print_fn=lambda x: stringlist.append(x)) short_model_summary = "\n".join(stringlist) text += " " + short_model_summary if self.model.lib == constants.Libraries.PYTORCH: text += " " + self.model.dnn.model else: for idx, (layer) in enumerate(self.model.build_model): text += " Layer {}: {}\n".format( idx, self.model.build_model[idx]) else: for idx, (layer) in enumerate(self.model.build_model): text += " Layer {}: {}\n".format( idx, self.model.build_model[idx]) self.training_report += text rp.report(text)
def save(self, savepath): ''' This method saves pickle objects and extra stuff needed ''' rp.report("Saving {} object...".format(self.__class__.__name__), verbosity_lvl=1) self.save_pickle(savepath) self.save_extra(savepath)
def ask_for_continue(self): if self.version != self.__curr_version: answer = "" while answer.lower() != "y" and answer.lower() != "n": answer = rp.input("The loaded training version is {} and the current version is {}. This difference can cause some kind of error while proceeding to the training, do you wish to continue? [y/n]".format(self.version, self.__curr_version), "n") if answer.lower() == "n": rp.report("The training was stopped.") exit()
def get_sc2_reward(self, obs): build_supply_depot = BuildUnitsGeneralizedRewardBuilder.ACTION_BUILD_SUPPLY_DEPOT build_barrack = BuildUnitsGeneralizedRewardBuilder.ACTION_BUILD_BARRACK build_marine = BuildUnitsGeneralizedRewardBuilder.ACTION_BUILD_MARINE do_nothing = BuildUnitsGeneralizedRewardBuilder.ACTION_DO_NOTHING current = self.get_sc2_number_of_supply_depot(obs) prev = self.get_sc2_number_of_supply_depot(self.previous_state) supply_depot_amount_diff = (current - prev) current = self.get_sc2_number_of_barracks(obs) prev = self.get_sc2_number_of_barracks(self.previous_state) barracks_amount_diff = (current - prev) current = self.get_sc2_number_of_marines(obs) prev = self.get_sc2_number_of_marines(self.previous_state) marines_amount_diff = (current - prev) negative_rwd = 0 chosen_action = BuildUnitsGeneralizedRewardBuilder.LAST_CHOSEN_ACTION if chosen_action > -1: supply_depot_amount = self.get_sc2_number_of_supply_depot(obs) barracks_amount = self.get_sc2_number_of_barracks(obs) minerals = obs.player.minerals if chosen_action == build_supply_depot: if supply_depot_amount > 7 or minerals < 100: negative_rwd = -10 elif chosen_action == build_barrack: if supply_depot_amount <= 0 or minerals < 150: negative_rwd = -10 elif chosen_action == build_marine: if barracks_amount <= 0 or minerals < 50: negative_rwd = -10 #elif chosen_action == do_nothing: # negative_rwd = -1 #rwd = negative_rwd + rwdB + rwdC rp.report(''' Calculated reward is: {}, composed of: supply_depot_amount: {}, barracks_amount: {}, marines_amount: {}, negative_rdw: {} '''.format( negative_rwd + supply_depot_amount_diff + barracks_amount_diff * 10 + marines_amount_diff * 100, supply_depot_amount_diff, barracks_amount_diff * 10, marines_amount_diff * 100, negative_rwd), verbosity_lvl=1) if supply_depot_amount_diff < 0 or barracks_amount_diff < 0 or marines_amount_diff < 0: return 0 else: rwd = negative_rwd + supply_depot_amount_diff + barracks_amount_diff * 10 + marines_amount_diff * 100 return rwd
def __init__(self): conf = ConfigParser.ConfigParser() conf.read(CONFIG_PATH) self.es_url = conf.get("ES", "es_url") self.esindex_prefix = conf.get("ES", "esindex_prefix") self.data_type = conf.get("indices_module", "data_type") self.sampling_speed = int(conf.get("indices_module", "sampling_speed")) self.store_size_unit = conf.get("indices_module", "store_size_unit") self.indices_parser = IndicesParser() self.reporter = Reporter()
def __init__(self): conf = ConfigParser.ConfigParser() conf.read(CONFIG_PATH) self.es_url = conf.get("ES", "es_url") self.esindex_prefix = conf.get("ES", "esindex_prefix") self.data_type = conf.get("nodes_module", "data_type") self.sampling_speed = int(conf.get("nodes_module", "sampling_speed")) self.data_structure = conf.get("nodes_module", "data_structure") self.nodes_total_count = conf.get("nodes_module", "nodes_total_count") self.nodes_parser = NodesParser() self.es_template = EsTemplate() self.reporter = Reporter()
def __init__(self, args, sess, model): """ Call the constructor of the base class init summaries init loading data :param args: :param sess: :param model: :return: """ super().__init__(args, sess, model) # Init load data and generator self.generator = None self.run = None # 加载数据 if self.args.data_mode == "realsense": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_realsence_data() elif self.args.data_mode == "cityscapes_val": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() elif self.args.data_mode == "cityscapes_test": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_test_data() elif self.args.data_mode == "video": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_vid_data() if self.args.task == "test": self.run = self.test elif self.args.task == "realsense": self.run = self.realsense_inference elif self.args.task == "realsense_imgs": self.run = self.realsense_imgs else: print("ERROR Please select a proper data_mode BYE") exit(-1) # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
def cycle_through_syncs(): my_report = Reporter() start_time = datetime.datetime.now() my_report.append_to_report('INFO: cycle started at ' + str(start_time)) # read configuration file for usernames and passwords and other parameters config = readDictFile('oli.config') # set from this config the survey id, sid, because it used everywhere sid = int(config['sid']) # create a connection to the postgresql database conn = ConnToOliDB() my_report.append_to_report(conn.init_result) # initialize the oc-webservice myDataWS = dataWS(config['userName'], config['password'], config['baseUrl']) tokens = {} tokens_list = read_ls_tokens(config, 0, 10) for token in tokens_list: tokens[token['token']] = token['participant_info']['firstname'] tokens_list = read_ls_tokens(config, 10, 10) for token in tokens_list: tokens[token['token']] = token['participant_info']['firstname'] print(tokens) # close the file so we can send it my_report.close_file() MailThisLogFile('logs/report.txt')
def load_pickle(self, persist_path): ''' This method loads a list instance saved by pickle. ''' #Check if pickle file exists pickle_path = self.get_full_persistance_pickle_path(persist_path) exists_pickle = os.path.isfile(pickle_path) #If yes, load it if exists_pickle: if os.path.getsize(pickle_path) > 0: with open(pickle_path, "rb") as pickle_in: pickle_dict = pickle.load(pickle_in) self.restore_pickleable_attributes(pickle_dict) rp.report("**************************************** \n Pickle for " + self.get_default_save_stamp() + " loaded. \n****************************************", 1)
def log_train_stats(self): if self.ep_count > 0: text = ("\n" + "Current Reward Avg.: {}".format( sum(self.ep_rewards) / self.ep_count) + " Win rate: {:10.3f}%".format( (sum(self.ep_victories) / self.ep_count) * 100) + " Avg number of steps: {}".format( sum(self.ep_avg_steps) / self.ep_count) + " Training Duration (seconds): {}".format( round(time() - self.training_start, 2)) + "\n") self.training_report += text rp.report(text) else: rp.report("There are no recorded episodes!")
def step(self, action): if (self.game == GeneralizedBuildUnitsScenario.GAME_DEEP_RTS): BuildUnitsGeneralizedRewardBuilder.LAST_CHOSEN_ACTION = action if self.steps == 0: self.setup_map() self.spawn_army() elif self.steps == 1: self.collect_gold() if rp.VERBOSITY_LEVEL > 0: str_ = ''' DRTS Episode Status: Number of gold = {}, Number of barracks = {}, Number of farms = {}, Number of soldiers = {}'''.format( self.env.players[0].gold, self.get_drts_unit_type_count(0, self.env.constants.Unit.Barracks), self.get_drts_unit_type_count(0, self.env.constants.Unit.Farm), self.get_drts_unit_type_count(0, self.env.constants.Unit.Footman), ) rp.report(str_, verbosity_lvl=1) state, reward, done = None, None, None if action == GeneralizedBuildUnitsScenario.ACTION_DRTS_DO_NOTHING: no_action = 15 state, reward, done = self.env.step(no_action) elif action == GeneralizedBuildUnitsScenario.ACTION_DRTS_BUILD_FARM: self.build_farm() no_action = 15 state, reward, done = self.env.step(no_action) elif action == GeneralizedBuildUnitsScenario.ACTION_DRTS_BUILD_BARRACK: self.build_barrack() no_action = 15 state, reward, done = self.env.step(no_action) elif action == GeneralizedBuildUnitsScenario.ACTION_DRTS_BUILD_FOOTMAN: self.build_footman() no_action = 15 state, reward, done = self.env.step(no_action) else: state, reward, done = self.env.step(action) self.steps += 1 return state, reward, done elif (self.game == GeneralizedBuildUnitsScenario.GAME_STARCRAFT_II): self.steps += 1 return self.env.step(action)
def log_ep_stats(self): if self.ep_count > 0: agent_info = dict.fromkeys(self.agent_info) for key in agent_info: agent_info[key] = self.agent_info[key][-1] rp.report( "Episode: {}/{} | Outcome: {} | Episode Avg. Reward: {:10.6f} | Episode Reward: {:10.6f} | Episode Steps: {:10.6f} | Best Reward was {} on episode: {} | Episode Duration (seconds): {} | Episode SPS: {} | SPS AVG: {} | Agent info: {}" .format(self.ep_count, self.ep_total, self.ep_victories[-1], self.ep_avg_rewards[-1], self.ep_rewards[-1], self.ep_steps_count[-1], self.best_reward, self.best_reward_episode, self.episode_duration_list[-1], self.episode_sps_list[-1], self.avg_sps_list[-1], agent_info)) else: rp.report("There are no recorded episodes!")
def test_agent(self): #backup attributes max_test_episodes_backup = self.max_test_episodes curr_playing_episodes_backup = self.curr_playing_episodes logger_backup = self.logger #full_save_play_path_backup = self.full_save_play_path enable_save_backup = self.enable_save #set attributes to test agent self.enable_save = False #self.full_save_play_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format(self.curr_training_episodes) #self.make_persistance_dirs(self.log_actions) self.max_test_episodes = self.reward_test_number_of_episodes self.curr_playing_episodes = 0 rp.report("> Starting to check current agent performance.") #make the agent play self.play() rp.report("> Finished checking current agent performance.") #get_reward_avg rwd_avg = self.logger.ep_avg_rewards[-1] #save this logger for later saving #this is needed to get some more detailed #info on tests logger_dict = {} logger_dict["logger"] = self.logger logger_dict["saved"] = False self.inside_training_test_loggers.append(logger_dict) #restore backup self.max_test_episodes = max_test_episodes_backup self.curr_playing_episodes = curr_playing_episodes_backup self.logger = logger_backup #self.full_save_play_path = full_save_play_path_backup self.enable_save = enable_save_backup #register reward avg: self.logger.inside_training_test_avg_rwds.append(rwd_avg)
def training_loop(self, is_testing, reward_from_agent=True): start_time = time.time() #current_episodes = 0 if is_testing: rp.report("\n\n> Playing") max_episodes = self.max_test_episodes max_steps = self.max_steps_testing current_episodes = self.curr_playing_episodes else: rp.report("> Training") max_episodes = self.max_training_episodes max_steps = self.max_steps_training current_episodes = self.curr_training_episodes if self.logger.ep_count == 0 or is_testing: self.logger = Logger( max_episodes, self.agent.__class__.__name__, self.agent.model.__class__.__name__, self.agent.model, self.agent.action_wrapper.__class__.__name__, self.agent.action_wrapper.get_action_space_dim(), self.agent.action_wrapper.get_named_actions(), self.agent.state_builder.__class__.__name__, self.agent.reward_builder.__class__.__name__, self.env.__class__.__name__, log_actions=self.log_actions, episode_batch_avg_calculation=self. episode_batch_avg_calculation, rolling_avg_window_size=self.rolling_avg_window_size) while current_episodes < max_episodes: current_episodes += 1 self.env.start() if is_testing: self.curr_playing_episodes = current_episodes else: self.curr_training_episodes = current_episodes # Reset the environment obs = self.env.reset() step_reward = 0 done = False # Passing the episode to the agent reset, so that it can be passed to model reset # Allowing the model to track the episode number, and decide if it should diminish the # Learning Rate, depending on the currently selected strategy. self.agent.reset(current_episodes) ep_reward = 0 victory = False ep_actions = np.zeros( self.agent.action_wrapper.get_action_space_dim()) self.logger.record_episode_start() for step in range(max_steps): # Choosing an action and passing it to our env.step() in order to act on our environment action = self.agent.step(obs, done, is_testing) # Take the action (a) and observe the outcome state (s') and reward (r) obs, default_reward, done = self.env.step(action) # Logic to test wheter this is the last step of this episode is_last_step = step == max_steps - 1 done = done or is_last_step # Checking whether or not to use the reward from the reward builder so we can pass that to the agent if reward_from_agent: step_reward = self.agent.get_reward( obs, default_reward, done) else: step_reward = default_reward # Making the agent learn if not is_testing: self.agent.learn(obs, step_reward, done) # Adding our step reward to the total count of the episode's reward ep_reward += step_reward ep_actions[self.agent.previous_action] += 1 if done: victory = default_reward == 1 agent_info = { "Learning rate": self.agent.model.learning_rate, "Gamma": self.agent.model.gamma, "Epsilon": self.agent.model.epsilon_greedy, } self.logger.record_episode(ep_reward, victory, step + 1, agent_info, ep_actions) break self.logger.log_ep_stats() # check if user wants to pause training and test agent # if self.do_reward_test and current_episodes % self.episode_batch_avg_calculation == 0 and current_episodes > 1: if ( not is_testing ) and self.do_reward_test and current_episodes % self.episode_batch_avg_calculation == 0: self.test_agent() # if this is not a test (evaluation), saving is enabled and we are in a multiple # of our save_every variable then we save the model and generate graphs if ( not is_testing ) and self.enable_save and current_episodes > 0 and current_episodes % self.save_every == 0: self.save(self.full_save_path) # if we have done tests along the training save all loggers for further detailed analysis if self.do_reward_test and len( self.inside_training_test_loggers) > 0: for idx in range( len(self.logger.ep_avg_batch_rewards_episodes)): logger_dict = self.inside_training_test_loggers[idx] if not logger_dict["saved"]: episode = self.logger.ep_avg_batch_rewards_episodes[ idx] backup_full_save_path = self.full_save_path self.full_save_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format( episode) self.make_persistance_dirs(self.log_actions) logger_dict["logger"].save(self.full_save_path) logger_dict["saved"] = True self.full_save_path = backup_full_save_path end_time = time.time() if is_testing: rp.report("\n> Test duration: {} seconds".format(end_time - start_time)) self.logger.log_train_stats() else: rp.report("\n> Training duration: {} seconds".format(end_time - start_time)) self.logger.log_train_stats() # Saving the model at the end of the training loop if self.enable_save: if is_testing: self.logger.save(self.full_save_play_path) rp.save(self.full_save_play_path) else: self.save(self.full_save_path) # if we have done tests along the training save all loggers for further detailed analysis if self.do_reward_test and len( self.inside_training_test_loggers) > 0: for idx in range( len(self.logger.ep_avg_batch_rewards_episodes)): logger_dict = self.inside_training_test_loggers[idx] if not logger_dict["saved"]: episode = self.logger.ep_avg_batch_rewards_episodes[ idx] backup_full_save_path = self.full_save_path self.full_save_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format( episode) self.make_persistance_dirs(self.log_actions) logger_dict["logger"].save(self.full_save_path) logger_dict["saved"] = True self.full_save_path = backup_full_save_path
def cycle_through_syncs(): # we start by reading the config file and preparing the connections to the databases my_report = Reporter() start_time = datetime.datetime.now() my_report.append_to_report('cycle started at ' + str(start_time)) # read configuration file for usernames and passwords and other parameters config = readDictFile('odkoc.config') # initialise the oc-webservices myWebService = studySubjectWS(config['userName'], config['password'], config['baseUrl']) myEventWS = studyEventWS(config['userName'], config['password'], config['baseUrl']) myDataWS = dataWS(config['userName'], config['password'], config['baseUrl']) # create connections to the postgresql databases conn_util = ConnToOdkUtilDB() my_report.append_to_report('try to connect to util database, result: %s ' % conn_util.init_result) conn_odk = ConnToOdkDB() my_report.append_to_report('try to connect to odk database, result: %s ' % conn_odk.init_result) # our cycle starts here and ends at the break while True: ''' start with form READER ''' # 1: start with retrieving the rows of odk-table HS_RDT_READER_1_V1_CORE odk_results = conn_odk.ReadDataFromOdkTable( "odk_prod.\"HS_RDT_READER_1_V1_CORE\"") # for the study subject id look in: # odk_result['GENERAL_INFORMATION_STUDY_SUBJECT_ID'] # 2: create subject in oc, if necessary # retrieve all StudySubjectEvents from oc, using the webservice allStudySubjectsInOC = myWebService.getListStudySubjects( config['studyIdentifier']) for odk_result in odk_results: # check if StudySubjectID from odk is already in oc add_subject_to_db = True study_subject_id = odk_result[ 'GENERAL_INFORMATION_STUDY_SUBJECT_ID'] # compare with all oc subjects events for studysubjectid_oc in allStudySubjectsInOC: if (studysubjectid_oc == study_subject_id): add_subject_to_db = False if (add_subject_to_db): # add study_subject_id to the oc add_results = myWebService.addStudySubject( config['studyIdentifier'], config['siteIdentifier'], study_subject_id) #print(add_results) # TODO: add error-handling for fail of creating subject # and schedule the event study_subject_oid = myEventWS.scheduleEvent( config['studyIdentifier'], study_subject_id, config['studyEventOID'], 'def', '1980-01-01') #TODO: add errorhandling for fail of scheduling event # now add the combination id oid to the util database # only add the pair if the oid starts with SS_ if (study_subject_oid.find('SS_') == 0): conn_util.AddSubjectToDB(study_subject_oid, study_subject_id) # extra check: maybe we somehow missed the study subject oid and then there will be no record in table study_subject_oc if (conn_util.DLookup( 'study_subject_oid', 'odkoc.study_subject_oc', 'study_subject_id=\'%s\'' % (study_subject_id)) == ''): new_subject = PGSubject(study_subject_id) conn_util.AddSubjectToDB(new_subject.GetSSOID(), study_subject_id) # only import the data if this hasn't been done before if (not conn_util.UriComplete(odk_result['_URI'])): # now we should have the study subject id plus oid, so we can compose the odm for import study_subject_id = odk_result[ 'GENERAL_INFORMATION_STUDY_SUBJECT_ID'] study_subject_oid = conn_util.DLookup( 'study_subject_oid', 'odkoc.study_subject_oc', 'study_subject_id=\'%s\'' % (study_subject_id)) complete_odm = compose_reader(study_subject_oid, odk_result) import_results = myDataWS.importData(complete_odm) # if our import was successful, then the result should start with Success # and if so, we can mark this uri as complete if (import_results.find('Success') == 0): conn_util.MarkUriComplete(odk_result['_URI'], 'reader') my_report.append_to_report('reader ' + study_subject_id + ': ' + import_results) ''' go on with with form SCREENING ''' odk_results = conn_odk.ReadDataFromOdkTable( "odk_prod.\"SCREEN19M__V3_CORE\"", 'not \"INFORMED_CONSENT_STUDY_SUBJECT_ID\" is null') # for the study subject id look in: # odk_result['INFORMED_CONSENT_STUDY_SUBJECT_ID'] # 2: create subject in oc, if necessary # retrieve all StudySubjectEvents from oc, using the webservice allStudySubjectsInOC = myWebService.getListStudySubjects( config['studyIdentifier']) for odk_result in odk_results: # check if StudySubjectID from odk is already in oc add_subject_to_db = True study_subject_id = odk_result['INFORMED_CONSENT_STUDY_SUBJECT_ID'] print(study_subject_id) # compare with all oc subjects events for studysubjectid_oc in allStudySubjectsInOC: if (studysubjectid_oc == study_subject_id): add_subject_to_db = False if (add_subject_to_db): # add study_subject_id to the oc add_results = myWebService.addStudySubject( config['studyIdentifier'], config['siteIdentifier'], study_subject_id) # TODO: add error-handling for fail of creating subject # and schedule the event study_subject_oid = myEventWS.scheduleEvent( config['studyIdentifier'], study_subject_id, config['studyEventOID'], 'def', '1980-01-01') #TODO: add errorhandling for fail of scheduling event # now add the combination id oid to the util database # only add the pair if the oid starts with SS_ if (study_subject_oid.find('SS_') == 0): conn_util.AddSubjectToDB(study_subject_oid, study_subject_id) # extra check: maybe we somehow missed the study subject oid and then there will be no record in table study_subject_oc if (conn_util.DLookup( 'study_subject_oid', 'odkoc.study_subject_oc', 'study_subject_id=\'%s\'' % (study_subject_id)) == ''): new_subject = PGSubject(study_subject_id) conn_util.AddSubjectToDB(new_subject.GetSSOID(), study_subject_id) print('we have study subject id %s and oid %s' % (study_subject_id, conn_util.DLookup( 'study_subject_oid', 'odkoc.study_subject_oc', 'study_subject_id=\'%s\'' % (study_subject_id)))) print(odk_result['_URI'], conn_util.UriComplete(odk_result['_URI'])) # only import the data if this hasn't been done before if (not conn_util.UriComplete(odk_result['_URI'])): print('dive into it') # now we should have the study subject id plus oid, so we can compose the odm for import study_subject_id = odk_result[ 'INFORMED_CONSENT_STUDY_SUBJECT_ID'] study_subject_oid = conn_util.DLookup( 'study_subject_oid', 'odkoc.study_subject_oc', 'study_subject_id=\'%s\'' % (study_subject_id)) complete_odm = compose_screening(study_subject_oid, odk_result) print(complete_odm) # we'll make an exception for I_MA006_OTHER_DISEASE_HX, because this is a group of check-boxes # in complete_odm we have a placeholder {OTHER_DISEASE_HX} parent_uri = odk_result['_URI'] hx_results = conn_odk.ReadDataFromOdkTable( "odk_prod.\"SCREEN19M__V3_MED_HISTO_CONCO_MED_OTHER_DISEASE_HX\"", '\"_PARENT_AURI\"=\'%s\'' % (parent_uri)) other_disease_hx = '' for hx in hx_results: other_disease_hx = other_disease_hx + hx['VALUE'] + ',' if (other_disease_hx != ''): # take off the last comma other_disease_hx = other_disease_hx[:-1] # finally we can replace the placeholder with the actual values complete_odm = complete_odm.replace('{OTHER_DISEASE_HX}', other_disease_hx) # import the odm data import_results = myDataWS.importData(complete_odm) if (import_results.find('Success') != 0): # if something went wrong, print it print(import_results) import_screening_core_success = False else: # if our import was successful, then make a note of it import_screening_core_success = True my_report.append_to_report('screening ' + study_subject_id + ': ' + import_results) ''' now we can look at the repeating item group for miscarriages ''' odk_misca_results = conn_odk.ReadDataFromOdkTable( "odk_prod.\"SCREEN19M__V3_OBSETRIC_INFO_EXAM_BABY_DIED\"", '\"_PARENT_AURI\"=\'%s\'' % (parent_uri)) for misca in odk_misca_results: # print('misca ' + parent_uri + ' ' + misca['_URI']) complete_odm = compose_misca(study_subject_oid, misca) import_results = myDataWS.importData(complete_odm) if (import_results.find('Success') != 0): # if something went wrong, print it print(import_results) import_screening_misca_success = False else: # if our import was successful, then make a note of it import_screening_misca_success = True my_report.append_to_report('misca ' + study_subject_id + ': ' + import_results) # now do the bookkeeping if (import_screening_core_success and import_screening_misca_success): conn_util.MarkUriComplete(odk_result['_URI'], 'screening') ''' go on with with form LAMP ''' odk_results = conn_odk.ReadDataFromOdkTable( "odk_prod.\"LAMP_TESTING_V1_CORE\"", 'not \"GENERAL_INFORMATION_STUDY_SUBJECT_ID\" is null') # for the study subject id look in: # odk_result['GENERAL_INFORMATION_STUDY_SUBJECT_ID'] # 2: create subject in oc, if necessary # retrieve all StudySubjectEvents from oc, using the webservice allStudySubjectsInOC = myWebService.getListStudySubjects( config['studyIdentifier']) for odk_result in odk_results: # check if StudySubjectID from odk is already in oc add_subject_to_db = True study_subject_id = odk_result[ 'GENERAL_INFORMATION_STUDY_SUBJECT_ID'] # compare with all oc subjects events for studysubjectid_oc in allStudySubjectsInOC: if (studysubjectid_oc == study_subject_id): add_subject_to_db = False if (add_subject_to_db): # add study_subject_id to the oc add_results = myWebService.addStudySubject( config['studyIdentifier'], config['siteIdentifier'], study_subject_id) # TODO: add error-handling for fail of creating subject # and schedule the event study_subject_oid = myEventWS.scheduleEvent( config['studyIdentifier'], study_subject_id, config['studyEventOID'], 'def', '1980-01-01') #TODO: add errorhandling for fail of scheduling event # now add the combination id oid to the util database # only add the pair if the oid starts with SS_ if (study_subject_oid.find('SS_') == 0): conn_util.AddSubjectToDB(study_subject_oid, study_subject_id) # extra check: maybe we somehow missed the study subject oid and then there will be no record in table study_subject_oc if (conn_util.DLookup( 'study_subject_oid', 'odkoc.study_subject_oc', 'study_subject_id=\'%s\'' % (study_subject_id)) == ''): new_subject = PGSubject(study_subject_id) conn_util.AddSubjectToDB(new_subject.GetSSOID(), study_subject_id) # only import the data if this hasn't been done before if (not conn_util.UriComplete(odk_result['_URI'])): # now we should have the study subject id plus oid, so we can compose the odm for import study_subject_id = odk_result[ 'GENERAL_INFORMATION_STUDY_SUBJECT_ID'] study_subject_oid = conn_util.DLookup( 'study_subject_oid', 'odkoc.study_subject_oc', 'study_subject_id=\'%s\'' % (study_subject_id)) complete_odm = compose_lamp(study_subject_oid, odk_result) import_results = myDataWS.importData(complete_odm) if (import_results.find('Success') != 0): # if something went wrong, print it print(import_results) import_lamp_success = False else: # if our import was successful, then make a note of it import_lamp_success = True my_report.append_to_report('lamp ' + study_subject_id + ': ' + import_results) if (import_lamp_success): conn_util.MarkUriComplete(odk_result['_URI'], 'lamp') # some book keeping to check if we must continue looping, or break the loop # first sleep a bit, so we do not eat up all CPU time.sleep(int(config['sleep_this_long'])) current_time = datetime.datetime.now() difference = current_time - start_time loop_this_long = config['loop_this_long'] max_diff_list = loop_this_long.split(sep=':') max_difference = datetime.timedelta(hours=int(max_diff_list[0]), minutes=int(max_diff_list[1]), seconds=int(max_diff_list[2])) if difference > max_difference: break my_report.append_to_report('finished looping from %s till %s.' % (start_time, current_time)) # close the file so we can send it my_report.close_file()
def old_train(self, test_params: TestParams = None, reward_from_agent=True): start_time = time.time() rp.report("> Training") if self.logger.ep_count == 0: self.logger = Logger( self.max_training_episodes, self.agent.__class__.__name__, self.agent.model.__class__.__name__, self.agent.model, self.agent.action_wrapper.__class__.__name__, self.agent.action_wrapper.get_action_space_dim(), self.agent.action_wrapper.get_named_actions(), self.agent.state_builder.__class__.__name__, self.agent.reward_builder.__class__.__name__, self.env.__class__.__name__, log_actions=self.log_actions, episode_batch_avg_calculation=self. episode_batch_avg_calculation, rolling_avg_window_size=self.rolling_avg_window_size) if test_params != None: test_params.logger = self.logger while self.curr_training_episodes < self.max_training_episodes: self.curr_training_episodes += 1 self.env.start() # Reset the environment obs = self.env.reset() step_reward = 0 done = False # Passing the episode to the agent reset, so that it can be passed to model reset # Allowing the model to track the episode number, and decide if it should diminish the # Learning Rate, depending on the currently selected strategy. self.agent.reset(self.curr_training_episodes) ep_reward = 0 victory = False ep_actions = np.zeros( self.agent.action_wrapper.get_action_space_dim()) self.logger.record_episode_start() for step in range(self.max_steps_training): # Choosing an action and passing it to our env.step() in order to act on our environment action = self.agent.step(obs, done, is_testing=False) obs, default_reward, done = self.env.step(action) is_last_step = step == self.max_steps_training - 1 done = done or is_last_step # Checking whether or not to use the reward from the reward builder so we can pass that to the agent if reward_from_agent: step_reward = self.agent.get_reward( obs, default_reward, done) else: step_reward = default_reward # Making the agent learn self.agent.learn(obs, step_reward, done) # Adding our step reward to the total count of the episode's reward ep_reward += step_reward ep_actions[self.agent.previous_action] += 1 if done: victory = default_reward == 1 agent_info = { "Learning rate": self.agent.model.learning_rate, "Gamma": self.agent.model.gamma, "Epsilon": self.agent.model.epsilon_greedy, } self.logger.record_episode(ep_reward, victory, step + 1, agent_info, ep_actions) break self.logger.log_ep_stats() #check if user wants to pause training and test agent #if self.do_reward_test and self.curr_training_episodes % self.episode_batch_avg_calculation == 0 and self.curr_training_episodes > 1: if self.do_reward_test and self.curr_training_episodes % self.episode_batch_avg_calculation == 0: self.test_agent() if self.enable_save and self.curr_training_episodes > 0 and self.curr_training_episodes % self.save_every == 0: self.save(self.full_save_path) #if we have done tests along the training #save all loggers for further detailed analysis #this was needed because the play() method #was saving these loggers every test, slowing down #training a lot. Putting this code here allows #to save them once and optimize training time. if self.do_reward_test and len( self.inside_training_test_loggers) > 0: for idx in range( len(self.logger.ep_avg_batch_rewards_episodes)): logger_dict = self.inside_training_test_loggers[idx] if not logger_dict["saved"]: episode = self.logger.ep_avg_batch_rewards_episodes[ idx] backup_full_save_path = self.full_save_path self.full_save_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format( episode) self.make_persistance_dirs(self.log_actions) logger_dict["logger"].save(self.full_save_path) logger_dict["saved"] = True self.full_save_path = backup_full_save_path if test_params != None and self.curr_training_episodes % test_params.test_steps == 0 and episode != 0: test_params.current_ep_count = self.curr_training_episodes self.play(test_params.num_matches, test_params.max_steps, test_params) # Stops training if reward threshold was reached in play testing if test_params.reward_threshold != None and test_params.reward_threshold <= test_params.logger.play_rewards_avg[ -1]: rp.report("> Reward threshold was reached!") rp.report("> Stopping training") break end_time = time.time() rp.report("\n> Training duration: {} seconds".format(end_time - start_time)) self.logger.log_train_stats() self.logger.plot_train_stats() # Saving the model when the training has ended if self.enable_save: self.save(self.full_save_path) #if we have done tests along the training #save all loggers for further detailed analysis #this was needed because the play() method #was saving these loggers every test, slowing down #training a lot. Putting this code here allows #to save them once and optimize training time. if self.do_reward_test and len( self.inside_training_test_loggers) > 0: for idx in range(len( self.logger.ep_avg_batch_rewards_episodes)): logger_dict = self.inside_training_test_loggers[idx] if not logger_dict["saved"]: episode = self.logger.ep_avg_batch_rewards_episodes[ idx] backup_full_save_path = self.full_save_path self.full_save_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format( episode) self.make_persistance_dirs(self.log_actions) logger_dict["logger"].save(self.full_save_path) logger_dict["saved"] = True self.full_save_path = backup_full_save_path
def old_play(self, test_params=None, reward_from_agent=True): rp.report("\n\n> Playing") self.logger = Logger( self.max_test_episodes, self.agent.__class__.__name__, self.agent.model.__class__.__name__, self.agent.model, self.agent.action_wrapper.__class__.__name__, self.agent.action_wrapper.get_action_space_dim(), self.agent.action_wrapper.get_named_actions(), self.agent.state_builder.__class__.__name__, self.agent.reward_builder.__class__.__name__, self.env.__class__.__name__, log_actions=self.log_actions, episode_batch_avg_calculation=self.episode_batch_avg_calculation, rolling_avg_window_size=self.rolling_avg_window_size) while self.curr_playing_episodes < self.max_test_episodes: self.curr_playing_episodes += 1 self.env.start() # Reset the environment obs = self.env.reset() step_reward = 0 done = False # Passing the episode to the agent reset, so that it can be passed to model reset # Allowing the model to track the episode number, and decide if it should diminish the # Learning Rate, depending on the currently selected strategy. self.agent.reset(self.curr_playing_episodes) ep_reward = 0 victory = False ep_actions = np.zeros( self.agent.action_wrapper.get_action_space_dim()) self.logger.record_episode_start() for step in range(self.max_steps_testing): action = self.agent.step(obs, done, is_testing=True) # Take the action (a) and observe the outcome state(s') and reward (r) obs, default_reward, done = self.env.step(action) is_last_step = step == self.max_steps_testing - 1 done = done or is_last_step if reward_from_agent: step_reward = self.agent.get_reward( obs, default_reward, done) else: step_reward = default_reward ep_reward += step_reward ep_actions[self.agent.previous_action] += 1 # If done: finish episode if done: victory = default_reward == 1 agent_info = { "Learning rate": self.agent.model.learning_rate, "Gamma": self.agent.model.gamma, "Epsilon": self.agent.model.epsilon_greedy, } self.logger.record_episode(ep_reward, victory, step + 1, agent_info, ep_actions) break self.logger.log_ep_stats() if test_params != None: test_params.logger.record_play_test(test_params.current_ep_count, self.logger.ep_rewards, self.logger.victories, self.max_test_episodes) else: # Only logs train stats if this is not a test, to avoid cluttering the interface with info self.logger.log_train_stats() # We need to save playing status as well if self.enable_save: self.logger.save(self.full_save_play_path) rp.save(self.full_save_play_path)
def __init__(self, args, sess, train_model, test_model): """ Call the constructor of the base class init summaries init loading data :param args: :param sess: :param model: :return: """ super().__init__(args, sess, train_model, test_model) ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = ['mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch'] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## # Init load data and generator self.generator = None if self.args.data_mode == "experiment_tfdata": self.data_session = None self.train_next_batch, self.train_data_len = self.init_tfdata(self.args.batch_size, self.args.abs_data_dir, (self.args.img_height, self.args.img_width), mode='train') self.num_iterations_training_per_epoch = self.train_data_len // self.args.batch_size self.generator = self.train_tfdata_generator elif self.args.data_mode == "experiment_h5": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data_h5() self.generator = self.train_h5_generator elif self.args.data_mode == "experiment_v2": self.targets_resize = self.args.targets_resize self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data(v2=True) self.generator = self.train_generator elif self.args.data_mode == "experiment": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data() self.generator = self.train_generator elif self.args.data_mode == "test_tfdata": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() self.generator = self.test_tfdata_generator elif self.args.data_mode == "test": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() self.generator = self.test_generator elif self.args.data_mode == "test_eval": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.names_mapper = None self.load_test_data() self.generator = self.test_generator elif self.args.data_mode == "test_v2": self.targets_resize = self.args.targets_resize self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data(v2=True) self.generator = self.test_generator elif self.args.data_mode == "video": self.args.data_mode = "test" self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_vid_data() self.generator = self.test_generator elif self.args.data_mode == "debug": print("Debugging photo loading..") # self.debug_x= misc.imread('/data/menna/cityscapes/leftImg8bit/val/lindau/lindau_000048_000019_leftImg8bit.png') # self.debug_y= misc.imread('/data/menna/cityscapes/gtFine/val/lindau/lindau_000048_000019_gtFine_labelIds.png') # self.debug_x= np.expand_dims(misc.imresize(self.debug_x, (512,1024)), axis=0) # self.debug_y= np.expand_dims(misc.imresize(self.debug_y, (512,1024)), axis=0) self.debug_x = np.load('data/debug/debug_x.npy') self.debug_y = np.load('data/debug/debug_y.npy') print("Debugging photo loaded") else: print("ERROR Please select a proper data_mode BYE") exit(-1) ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class if self.args.mode == 'train' or 'overfit': self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
#!/usr/bin/env python3 from utils.reporter import Reporter if __name__ == '__main__': reporter = Reporter() reporter.report_test_success()
#!/usr/bin/env python3 from utils.reporter import Reporter if __name__ == '__main__': reporter = Reporter() reporter.report_release_success()
class Train(BasicTrain): """ Trainer class """ def __init__(self, args, sess, train_model, test_model): """ Call the constructor of the base class init summaries init loading data :param args: :param sess: :param model: :return: """ super().__init__(args, sess, train_model, test_model) ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = ['mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch'] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## # Init load data and generator self.generator = None if self.args.data_mode == "experiment_tfdata": self.data_session = None self.train_next_batch, self.train_data_len = self.init_tfdata(self.args.batch_size, self.args.abs_data_dir, (self.args.img_height, self.args.img_width), mode='train') self.num_iterations_training_per_epoch = self.train_data_len // self.args.batch_size self.generator = self.train_tfdata_generator elif self.args.data_mode == "experiment_h5": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data_h5() self.generator = self.train_h5_generator elif self.args.data_mode == "experiment_v2": self.targets_resize = self.args.targets_resize self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data(v2=True) self.generator = self.train_generator elif self.args.data_mode == "experiment": self.train_data = None self.train_data_len = None self.val_data = None self.val_data_len = None self.num_iterations_training_per_epoch = None self.num_iterations_validation_per_epoch = None self.load_train_data() self.generator = self.train_generator elif self.args.data_mode == "test_tfdata": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() self.generator = self.test_tfdata_generator elif self.args.data_mode == "test": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data() self.generator = self.test_generator elif self.args.data_mode == "test_eval": self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.names_mapper = None self.load_test_data() self.generator = self.test_generator elif self.args.data_mode == "test_v2": self.targets_resize = self.args.targets_resize self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_val_data(v2=True) self.generator = self.test_generator elif self.args.data_mode == "video": self.args.data_mode = "test" self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_vid_data() self.generator = self.test_generator elif self.args.data_mode == "debug": print("Debugging photo loading..") # self.debug_x= misc.imread('/data/menna/cityscapes/leftImg8bit/val/lindau/lindau_000048_000019_leftImg8bit.png') # self.debug_y= misc.imread('/data/menna/cityscapes/gtFine/val/lindau/lindau_000048_000019_gtFine_labelIds.png') # self.debug_x= np.expand_dims(misc.imresize(self.debug_x, (512,1024)), axis=0) # self.debug_y= np.expand_dims(misc.imresize(self.debug_y, (512,1024)), axis=0) self.debug_x = np.load('data/debug/debug_x.npy') self.debug_y = np.load('data/debug/debug_y.npy') print("Debugging photo loaded") else: print("ERROR Please select a proper data_mode BYE") exit(-1) ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class if self.args.mode == 'train' or 'overfit': self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) ################################################################################## def crop(self): sh = self.val_data['X'].shape temp_val_data = {'X': np.zeros((sh[0] * 2, sh[1], sh[2] // 2, sh[3]), self.val_data['X'].dtype), 'Y': np.zeros((sh[0] * 2, sh[1], sh[2] // 2), self.val_data['Y'].dtype)} for i in range(sh[0]): temp_val_data['X'][i * 2, :, :, :] = self.val_data['X'][i, :, :sh[2] // 2, :] temp_val_data['X'][i * 2 + 1, :, :, :] = self.val_data['X'][i, :, sh[2] // 2:, :] temp_val_data['Y'][i * 2, :, :] = self.val_data['Y'][i, :, :sh[2] // 2] temp_val_data['Y'][i * 2 + 1, :, :] = self.val_data['Y'][i, :, sh[2] // 2:] self.val_data = temp_val_data def init_tfdata(self, batch_size, main_dir, resize_shape, mode='train'): self.data_session = tf.Session() print("Creating the iterator for training data") with tf.device('/cpu:0'): segdl = SegDataLoader(main_dir, batch_size, (resize_shape[0], resize_shape[1]), resize_shape, # * 2), resize_shape, 'data/cityscapes_tfdata/train.txt') iterator = Iterator.from_structure(segdl.data_tr.output_types, segdl.data_tr.output_shapes) next_batch = iterator.get_next() self.init_op = iterator.make_initializer(segdl.data_tr) self.data_session.run(self.init_op) print("Loading Validation data in memoryfor faster training..") self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} # self.crop() # import cv2 # cv2.imshow('crop1', self.val_data['X'][0,:,:,:]) # cv2.imshow('crop2', self.val_data['X'][1,:,:,:]) # cv2.imshow('seg1', self.val_data['Y'][0,:,:]) # cv2.imshow('seg2', self.val_data['Y'][1,:,:]) # cv2.waitKey() self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size # self.num_iterations_validation_per_epoch = ( # self.val_data_len + self.args.batch_size - 1) // self.args.batch_size self.num_iterations_validation_per_epoch = self.val_data_len // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") return next_batch, segdl.data_len @timeit def load_overfit_data(self): print("Loading data..") self.train_data = {'X': np.load(self.args.data_dir + "X_train.npy"), 'Y': np.load(self.args.data_dir + "Y_train.npy")} self.train_data_len = self.train_data['X'].shape[0] - self.train_data['X'].shape[0] % self.args.batch_size self.num_iterations_training_per_epoch = ( self.train_data_len + self.args.batch_size - 1) // self.args.batch_size print("Train-shape-x -- " + str(self.train_data['X'].shape)) print("Train-shape-y -- " + str(self.train_data['Y'].shape)) print("Num of iterations in one epoch -- " + str(self.num_iterations_training_per_epoch)) print("Overfitting data is loaded") print("Loading Validation data..") self.val_data = self.train_data self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size self.num_iterations_validation_per_epoch = ( self.val_data_len + self.args.batch_size - 1) // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") def overfit_generator(self): start = 0 new_epoch_flag = True idx = None while True: # init index array if it is a new_epoch if new_epoch_flag: if self.args.shuffle: idx = np.random.choice(self.train_data_len, self.train_data_len, replace=False) else: idx = np.arange(self.train_data_len) new_epoch_flag = False # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.train_data['X'][mask] y_batch = self.train_data['Y'][mask] start += self.args.batch_size if start >= self.train_data_len: start = 0 new_epoch_flag = True yield x_batch, y_batch def init_summaries(self): """ Create the summary part of the graph :return: """ with tf.variable_scope('train-summary-per-epoch'): for tag in self.scalar_summary_tags: self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) self.summary_ops[tag] = tf.summary.scalar(tag, self.summary_placeholders[tag]) for tag, shape in self.images_summary_tags: self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) self.summary_ops[tag] = tf.summary.image(tag, self.summary_placeholders[tag], max_outputs=10) def add_summary(self, step, summaries_dict=None, summaries_merged=None): """ Add the summaries to tensorboard :param step: :param summaries_dict: :param summaries_merged: :return: """ if summaries_dict is not None: summary_list = self.sess.run([self.summary_ops[tag] for tag in summaries_dict.keys()], {self.summary_placeholders[tag]: value for tag, value in summaries_dict.items()}) for summary in summary_list: self.summary_writer.add_summary(summary, step) if summaries_merged is not None: self.summary_writer.add_summary(summaries_merged, step) @timeit def load_train_data(self, v2=False): print("Loading Training data..") self.train_data = {'X': np.load(self.args.data_dir + "X_train.npy"), 'Y': np.load(self.args.data_dir + "Y_train.npy")} self.train_data = self.resize(self.train_data) if v2: out_shape = (self.train_data['Y'].shape[1] // self.targets_resize, self.train_data['Y'].shape[2] // self.targets_resize) yy = np.zeros((self.train_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.train_data['Y'].dtype) for y in range(self.train_data['Y'].shape[0]): yy[y, ...] = misc.imresize(self.train_data['Y'][y, ...], out_shape, interp='nearest') self.train_data['Y'] = yy self.train_data_len = self.train_data['X'].shape[0] self.num_iterations_training_per_epoch = ( self.train_data_len + self.args.batch_size - 1) // self.args.batch_size print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len)) print("Train-shape-y -- " + str(self.train_data['Y'].shape)) print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch)) print("Training data is loaded") print("Loading Validation data..") self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} self.val_data['Y_large'] = self.val_data['Y'] if v2: out_shape = (self.val_data['Y'].shape[1] // self.targets_resize, self.val_data['Y'].shape[2] // self.targets_resize) yy = np.zeros((self.val_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.train_data['Y'].dtype) for y in range(self.val_data['Y'].shape[0]): yy[y, ...] = misc.imresize(self.val_data['Y'][y, ...], out_shape, interp='nearest') self.val_data['Y'] = yy self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size self.num_iterations_validation_per_epoch = ( self.val_data_len + self.args.batch_size - 1) // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") @timeit def load_train_data_h5(self): print("Loading Training data..") self.train_data = h5py.File(self.args.data_dir + self.args.h5_train_file, 'r') self.train_data_len = self.args.h5_train_len self.num_iterations_training_per_epoch = ( self.train_data_len + self.args.batch_size - 1) // self.args.batch_size print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len)) print("Train-shape-y -- " + str(self.train_data['Y'].shape)) print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch)) print("Training data is loaded") print("Loading Validation data..") self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size self.num_iterations_validation_per_epoch = ( self.val_data_len + self.args.batch_size - 1) // self.args.batch_size print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) print("Val-shape-y -- " + str(self.val_data['Y'].shape)) print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) print("Validation data is loaded") @timeit def load_vid_data(self): print("Loading Video data..") self.test_data = {'X': np.load(self.args.data_dir + "X_vid.npy")} self.test_data['Y'] = np.zeros(self.test_data['X'].shape[:3]) self.test_data_len = self.test_data['X'].shape[0] print("Vid-shape-x -- " + str(self.test_data['X'].shape)) print("Vid-shape-y -- " + str(self.test_data['Y'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Video data is loaded") @timeit def load_val_data(self, v2=False): print("Loading Validation data..") self.test_data = {'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy")} self.test_data = self.resize(self.test_data) self.test_data['Y_large'] = self.test_data['Y'] if v2: out_shape = (self.test_data['Y'].shape[1] // self.targets_resize, self.test_data['Y'].shape[2] // self.targets_resize) yy = np.zeros((self.test_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.test_data['Y'].dtype) for y in range(self.test_data['Y'].shape[0]): yy[y, ...] = misc.imresize(self.test_data['Y'][y, ...], out_shape, interp='nearest') self.test_data['Y'] = yy self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size print("Validation-shape-x -- " + str(self.test_data['X'].shape)) print("Validation-shape-y -- " + str(self.test_data['Y'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Validation data is loaded") @timeit def load_test_data(self): print("Loading Testing data..") self.test_data = {'X': np.load(self.args.data_dir + "X_test.npy")} self.names_mapper = {'X': np.load(self.args.data_dir + "xnames_test.npy"), 'Y': np.load(self.args.data_dir + "ynames_test.npy")} self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size print("Test-shape-x -- " + str(self.test_data['X'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Test data is loaded") def test_generator(self): start = 0 new_epoch_flag = True idx = None while True: # init index array if it is a new_epoch if new_epoch_flag: if self.args.shuffle: idx = np.random.choice(self.test_data_len, self.test_data_len, replace=False) else: idx = np.arange(self.test_data_len) new_epoch_flag = False # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.test_data['X'][mask] y_batch = self.test_data['Y'][mask] # update start idx start += self.args.batch_size if start >= self.test_data_len: start = 0 new_epoch_flag = True yield x_batch, y_batch def train_generator(self): start = 0 idx = np.random.choice(self.train_data_len, self.num_iterations_training_per_epoch * self.args.batch_size, replace=True) while True: # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.train_data['X'][mask] y_batch = self.train_data['Y'][mask] # update start idx start += self.args.batch_size yield x_batch, y_batch if start >= self.train_data_len: return def train_tfdata_generator(self): with tf.device('/cpu:0'): while True: x_batch, y_batch = self.data_session.run(self.train_next_batch) yield x_batch, y_batch[:, :, :, 0] def train_h5_generator(self): start = 0 idx = np.random.choice(self.train_data_len, self.train_data_len, replace=False) while True: # select the mini_batches mask = idx[start:start + self.args.batch_size] x_batch = self.train_data['X'][sorted(mask.tolist())] y_batch = self.train_data['Y'][sorted(mask.tolist())] # update start idx start += self.args.batch_size if start >= self.train_data_len: return yield x_batch, y_batch def resize(self, data): X = [] Y = [] for i in range(data['X'].shape[0]): X.append(misc.imresize(data['X'][i, ...], (self.args.img_height, self.args.img_width))) Y.append(misc.imresize(data['Y'][i, ...], (self.args.img_height, self.args.img_width), 'nearest')) data['X'] = np.asarray(X) data['Y'] = np.asarray(Y) return data def train(self): print("Training mode will begin NOW ..") # curr_lr= self.model.args.learning_rate for cur_epoch in range(self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): # init tqdm and get the epoch value tt = tqdm(self.generator(), total=self.num_iterations_training_per_epoch, desc="epoch-" + str(cur_epoch) + "-") # init the current iterations cur_iteration = 0 # init acc and loss lists loss_list = [] acc_list = [] # loop by the number of iterations for x_batch, y_batch in tt: # get the cur_it for the summary cur_it = self.model.global_step_tensor.eval(self.sess) # Feed this variables to the network feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: True # self.model.curr_learning_rate:curr_lr } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_training_per_epoch - 1: # run the feed_forward _, loss, acc, summaries_merged = self.sess.run( [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] # summarize # self.add_summary(cur_it, summaries_merged=summaries_merged) else: # run the feed_forward if self.args.data_mode == 'experiment_v2': _, loss, acc, summaries_merged = self.sess.run( [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries], feed_dict=feed_dict) else: _, loss, acc, summaries_merged, segmented_imgs = self.sess.run( [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries, self.model.segmented_summary], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) # summarize summaries_dict = dict() summaries_dict['train-loss-per-epoch'] = total_loss summaries_dict['train-acc-per-epoch'] = total_acc if self.args.data_mode != 'experiment_v2': summaries_dict['train_prediction_sample'] = segmented_imgs # self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics('train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) self.reporter.report_experiment_statistics('train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) self.reporter.finalize() # Update the Global step self.model.global_step_assign_op.eval(session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # Update the Cur Epoch tensor # it is the last thing because if it is interrupted it repeat this self.model.global_epoch_assign_op.eval(session=self.sess, feed_dict={self.model.global_epoch_input: cur_epoch + 1}) # print in console tt.close() print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[ :6]) # Break the loop to finalize this epoch break # Update the Global step self.model.global_step_assign_op.eval(session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # update the cur_iteration cur_iteration += 1 # Save the current checkpoint if cur_epoch % self.args.save_every == 0: self.save_model() # Test the model on validation if cur_epoch % self.args.test_every == 0: self.test_per_epoch(step=self.model.global_step_tensor.eval(self.sess), epoch=self.model.global_epoch_tensor.eval(self.sess)) # if cur_epoch % self.args.learning_decay_every == 0: # curr_lr= curr_lr*self.args.learning_decay # print('Current learning rate is ', curr_lr) print("Training Finished") def test_per_epoch(self, step, epoch): print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") # init tqdm and get the epoch value tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, desc="Val-epoch-" + str(epoch) + "-") # init acc and loss lists loss_list = [] acc_list = [] inf_list = [] # idx of minibatch idx = 0 # reset metrics self.metrics.reset() # get the maximum iou to compare with and save the best model max_iou = self.model.best_iou_tensor.eval(self.sess) # loop by the number of iterations for cur_iteration in tt: # load minibatches x_batch = self.val_data['X'][idx:idx + self.args.batch_size] y_batch = self.val_data['Y'][idx:idx + self.args.batch_size] if self.args.data_mode == 'experiment_v2': y_batch_large = self.val_data['Y_large'][idx:idx + self.args.batch_size] # update idx of minibatch idx += self.args.batch_size # Feed this variables to the network feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_validation_per_epoch - 1: start = time.time() # run the feed_forward out_argmax, loss, acc, summaries_merged = self.sess.run( [self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries], feed_dict=feed_dict) end = time.time() # log loss and acc loss_list += [loss] acc_list += [acc] inf_list += [end - start] if self.args.data_mode == 'experiment_v2': yy = np.zeros((out_argmax.shape[0], y_batch_large.shape[1], y_batch_large.shape[2]), dtype=np.uint32) out_argmax = np.asarray(out_argmax, dtype=np.uint8) for y in range(out_argmax.shape[0]): yy[y, ...] = misc.imresize(out_argmax[y, ...], y_batch_large.shape[1:], interp='nearest') y_batch = y_batch_large out_argmax = yy # log metrics self.metrics.update_metrics_batch(out_argmax, y_batch) else: start = time.time() # run the feed_forward if self.args.data_mode == 'experiment_v2': # Issues in concatenating gt and img with diff sizes now for segmented_imgs out_argmax, acc = self.sess.run( [self.test_model.out_argmax, self.test_model.accuracy], feed_dict=feed_dict) else: out_argmax, acc, segmented_imgs = self.sess.run( [self.test_model.out_argmax, self.test_model.accuracy, self.test_model.segmented_summary], feed_dict=feed_dict) end = time.time() # log loss and acc acc_list += [acc] inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, y_batch) # mean over batches total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics(self.num_iterations_validation_per_epoch) mean_iou_arr = self.metrics.iou mean_inference = str(np.mean(inf_list)) + '-seconds' # summarize summaries_dict = dict() summaries_dict['val-acc-per-epoch'] = total_acc summaries_dict['mean_iou_on_val'] = mean_iou if self.args.data_mode != 'experiment_v2': # Issues in concatenating gt and img with diff sizes now for segmented_imgs summaries_dict['val_prediction_sample'] = segmented_imgs # self.add_summary(step, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics('validation-acc', 'epoch-' + str(epoch), str(total_acc)) self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch), str(mean_inference)) self.reporter.report_experiment_validation_iou('epoch-' + str(epoch), str(mean_iou), mean_iou_arr) self.reporter.finalize() # print in console tt.close() print("Val-epoch-" + str(epoch) + "-" + "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou)) print("Last_max_iou: " + str(max_iou)) if mean_iou > max_iou: print("This validation got a new best iou. so we will save this one") # save the best model self.save_best_model() # Set the new maximum self.model.best_iou_assign_op.eval(session=self.sess, feed_dict={self.model.best_iou_input: mean_iou}) else: print("hmm not the best validation epoch :/..") break # Break the loop to finalize this epoch def linknet_postprocess(self, gt): gt2 = gt - 1 gt2[gt == -1] = 19 return gt2 def test(self, pkl=False): print("Testing mode will begin NOW..") # load the best model checkpoint to test on it if not pkl: self.load_best_model() # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # naming = np.load(self.args.data_dir + 'names_train.npy') # init acc and loss lists acc_list = [] img_list = [] # idx of image idx = 0 # reset metrics self.metrics.reset() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] y_batch = self.test_data['Y'][idx:idx + 1] if self.args.data_mode == 'test_v2': y_batch_large = self.test_data['Y_large'][idx:idx + 1] idx += 1 # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.test_model.x_pl_before: x_batch, self.test_model.y_pl_before: y_batch, self.test_model.is_training: False, } else: feed_dict = {self.test_model.x_pl: x_batch, self.test_model.y_pl: y_batch, self.test_model.is_training: False } # run the feed_forward if self.args.data_mode == 'test_v2': out_argmax, acc = self.sess.run( [self.test_model.out_argmax, self.test_model.accuracy], feed_dict=feed_dict) else: out_argmax, acc, segmented_imgs = self.sess.run( [self.test_model.out_argmax, self.test_model.accuracy, # self.test_model.merged_summaries, self.test_model.segmented_summary], self.test_model.segmented_summary], feed_dict=feed_dict) if self.args.data_mode == 'test_v2': yy = np.zeros((out_argmax.shape[0], y_batch_large.shape[1], y_batch_large.shape[2]), dtype=np.uint32) out_argmax = np.asarray(out_argmax, dtype=np.uint8) for y in range(out_argmax.shape[0]): yy[y, ...] = misc.imresize(out_argmax[y, ...], y_batch_large.shape[1:], interp='nearest') y_batch = y_batch_large out_argmax = yy if pkl: out_argmax[0] = self.linknet_postprocess(out_argmax[0]) segmented_imgs = decode_labels(out_argmax, 20) # print('mean preds ', out_argmax.mean()) # np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy', out_argmax[0]) if self.args.data_mode == 'test': plt.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0]) # log loss and acc acc_list += [acc] # log metrics if self.args.random_cropping: y1 = np.expand_dims(y_batch[0, :, :512], axis=0) y2 = np.expand_dims(y_batch[0, :, 512:], axis=0) y_batch = np.concatenate((y1, y2), axis=0) self.metrics.update_metrics(out_argmax, y_batch, 0, 0) else: self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0) # mean over batches total_loss = 0 total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics(self.test_data_len) # print in console tt.close() print("Here the statistics") print("Total_loss: " + str(total_loss)) print("Total_acc: " + str(total_acc)[:6]) print("mean_iou: " + str(mean_iou)) print("Plotting imgs") for i in range(len(img_list)): plt.imsave(self.args.imgs_dir + 'test_' + str(i) + '.png', img_list[i]) def test_eval(self, pkl=False): print("Testing mode will begin NOW..") # load the best model checkpoint to test on it if not pkl: self.load_best_model() # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # idx of image idx = 0 # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.test_model.x_pl_before: x_batch, self.test_model.is_training: False, } else: feed_dict = {self.test_model.x_pl: x_batch, self.test_model.is_training: False } # run the feed_forward out_argmax, segmented_imgs = self.sess.run( [self.test_model.out_argmax, self.test_model.segmented_summary], feed_dict=feed_dict) if pkl: out_argmax[0] = self.linknet_postprocess(out_argmax[0]) segmented_imgs = decode_labels(out_argmax, 20) # Colored results for visualization colored_save_path = self.args.out_dir + 'imgs/' + str(self.names_mapper['Y'][idx]) if not os.path.exists(os.path.dirname(colored_save_path)): os.makedirs(os.path.dirname(colored_save_path)) plt.imsave(colored_save_path, segmented_imgs[0]) # Results for official evaluation save_path = self.args.out_dir + 'results/' + str(self.names_mapper['Y'][idx]) if not os.path.exists(os.path.dirname(save_path)): os.makedirs(os.path.dirname(save_path)) output = postprocess(out_argmax[0]) misc.imsave(save_path, misc.imresize(output, [1024, 2048], 'nearest')) idx += 1 # print in console tt.close() def test_inference(self): """ Like the testing function but this one is for calculate the inference time and measure the frame per second """ print("INFERENCE mode will begin NOW..") # load the best model checkpoint to test on it self.load_best_model() # output_node: network/output/Argmax # input_node: network/input/Placeholder # for n in tf.get_default_graph().as_graph_def().node: # if 'input' in n.name:#if 'Argmax' in n.name: # import pdb; pdb.set_trace() print("Saving graph...") tf.train.write_graph(self.sess.graph_def, ".", 'graph.pb') print("Graph saved successfully.\n\n") exit(1) # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) # idx of image idx = 0 # create the FPS Meter fps_meter = FPSMeter() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] y_batch = self.test_data['Y'][idx:idx + 1] # update idx of mini_batch idx += 1 # Feed this variables to the network if self.args.random_cropping: feed_dict = {self.test_model.x_pl_before: x_batch, self.test_model.y_pl_before: y_batch # self.test_model.is_training: False, } else: feed_dict = {self.test_model.x_pl: x_batch, self.test_model.y_pl: y_batch # self.test_model.is_training: False } # calculate the time of one inference start = time.time() # run the feed_forward _ = self.sess.run( [self.test_model.out_argmax], feed_dict=feed_dict) # update the FPS meter fps_meter.update(time.time() - start) fps_meter.print_statistics() def finalize(self): self.reporter.finalize() self.summary_writer.close() self.save_model() def debug_layers(self): """ This function will be responsible for output all outputs of all layers and dump them in a pickle :return: """ print("Debugging mode will begin NOW..") layers = tf.get_collection('debug_layers') print("ALL Layers in the collection that i wanna to run {} layer".format(len(layers))) for layer in layers: print(layer) # exit(0) # reset metrics self.metrics.reset() print('mean image ', self.debug_x.mean()) print('mean gt ', self.debug_y.mean()) self.debug_y = self.linknet_preprocess_gt(self.debug_y) feed_dict = {self.test_model.x_pl: self.debug_x, self.test_model.y_pl: self.debug_y, self.test_model.is_training: False } # var = [v for v in tf.all_variables() if v.op.name == "network/decoder_block_4/deconv/deconv/weights"] # conv_w= self.sess.run(var[0]) # var = [v for v in tf.all_variables() if v.op.name == "network/decoder_block_4/deconv/deconv/biases"] # bias= self.sess.run(var[0]) # run the feed_forward out_layers = self.sess.run(layers, feed_dict=feed_dict) for layer in out_layers: print(layer.shape) # dict_out= torchfile.load('out_networks_layers/dict_out.t7') ## init= tf.constant_initializer(conv_w) ## conv_w1 = tf.get_variable('my_weights', [3,3,128,128], tf.float32, initializer=init, trainable=True) # pp= tf.nn.relu(layers[39]) # out_relu= self.sess.run(pp, feed_dict={self.test_model.x_pl: self.debug_x, # self.test_model.y_pl: self.debug_y, # self.test_model.is_training: False # }) ## pp = tf.nn.conv2d_transpose(layers[39], conv_w, (1,32,64,128), strides=(1,2,2,1), padding="SAME") ## pp= tf.image.resize_images(layers[39], (32,64)) ## pp = tf.nn.conv2d(pp, conv_w, strides=(1,1,1,1), padding="SAME") ## bias1= tf.get_variable('my_bias', 128, tf.float32, tf.constant_initializer(bias)) # pp = tf.nn.bias_add(pp, bias) # #self.sess.run(conv_w1.initializer) # #self.sess.run(bias1.initializer) # out_deconv= self.sess.run(pp, feed_dict={self.test_model.x_pl: self.debug_x, # self.test_model.y_pl: self.debug_y, # self.test_model.is_training: False # }) # out_deconv_direct= self.sess.run(layers[40], feed_dict={self.test_model.x_pl: self.debug_x, # self.test_model.y_pl: self.debug_y, # self.test_model.is_training: False # }) # pdb.set_trace() # print(out_layers) # exit(0) # dump them in a pickle with open("out_networks_layers/out_linknet_layers.pkl", "wb") as f: pickle.dump(out_layers, f, protocol=2) # run the feed_forward again to see argmax and segmented out_argmax, segmented_imgs = self.sess.run( [self.test_model.out_argmax, self.test_model.segmented_summary], feed_dict=feed_dict) print('mean preds ', out_argmax[0].mean()) plt.imsave(self.args.out_dir + 'imgs/' + 'debug.png', segmented_imgs[0]) self.metrics.update_metrics(out_argmax[0], self.debug_y, 0, 0) mean_iou = self.metrics.compute_final_metrics(1) print("mean_iou_of_debug: " + str(mean_iou))
def cycle_through_syncs(): my_report = Reporter() start_time = datetime.datetime.now() my_report.append_to_report('cycle started at ' + str(start_time)) # read configuration file for usernames and passwords and other parameters config = readDictFile('oli.config') # read with combinations of StudyEventOIDs and LimeSurvey sids event_survey_pairs = readDictFile('event_survey_pairs') # initialise the oc-webservice myWebService = studySubjectWS(config['userName'], config['password'], config['baseUrl']) myDataWS = dataWS(config['userName'], config['password'], config['baseUrl']) # create a connection to the postgresql database conn = ConnToOliDB() my_report.append_to_report(conn.init_result) while True: # retrieve all StudySubjectEvents, using the webservice allStudySubjectEvents = myWebService.getListStudySubjectEvents( config['studyIdentifier']) # now we have the StudySubjectIDs, run them against the postgresql table subjects # retrieve the subjects, using the connection to the postgresql database subjects_in_db = conn.ReadSubjectsFromDB() for studysubject_event in allStudySubjectEvents: # check if StudySubjectID is already in pg_database add_subject_to_db = True for subject_in_db in subjects_in_db: # check if we must check this event if (studysubject_event[0] == subject_in_db[1]): add_subject_to_db = False if (add_subject_to_db): myPgSubject = PGSubject(studysubject_event[0]) conn.AddSubjectsToDB([(myPgSubject.GetSSOID(), studysubject_event[0])]) my_report.append_to_report('added %s to database' % studysubject_event[0]) # now all StudySubjects in OpenClinica are also in our postgresql-database # so we refresh our list subjects_in_db = conn.ReadSubjectsFromDB() # collecting LimeSurvey data # Make a session, which is a bit of overhaed, but the script will be running for hours. api = LimeSurveyRemoteControl2API(config['lsUrl']) session_req = api.sessions.get_session_key(config['lsUser'], config['lsPassword']) session_key = session_req.get('result') # initialise a new list for all tokens of all surveys # so we can check if a new token must be created all_tokens = [] for event_oid, sid in event_survey_pairs.items(): participants_req = api.tokens.list_participants(session_key, sid) participants = participants_req.get('result') for participant in participants: #loop through the participants, but only if there are any if participant != 'status': p_info = participant.get('participant_info') all_tokens.append((p_info.get('firstname'), event_oid, sid, participant.get('token'), participant.get('completed'))) for studysubject_event in allStudySubjectEvents: # check if we must check this event if studysubject_event[1] in event_survey_pairs: # yes, we must check this event blnAddTokens = True for one_token in all_tokens: if one_token[0] == studysubject_event[0] and one_token[ 1] == studysubject_event[1]: # a token exists blnAddTokens = False if blnAddTokens: #self._logger.debug("add token for " + studysubject_event[0] + ", " + studysubject_event[1]) print("add token for " + studysubject_event[0] + " " + str(event_survey_pairs[studysubject_event[1]]) + ", " + studysubject_event[1]) participant_data = {'firstname': studysubject_event[0]} #add_participant_req = api.tokens.add_participants( session_key, event_survey_pairs[studysubject_event[1]], participant_data) my_report.append_to_report( 'created token for survey %s for subject %s' % (sid, studysubject_event[0])) # we may have added tokens, so refresh all_tokens # TODO: lets's make this a method all_tokens = [] for event_oid, sid in event_survey_pairs.items(): participants_req = api.tokens.list_participants(session_key, sid) participants = participants_req.get('result') for participant in participants: #loop through the participants, but only if there are any if participant != 'status': p_info = participant.get('participant_info') all_tokens.append((p_info.get('firstname'), event_oid, sid, participant.get('token'), participant.get('completed'))) # now import the LimeSurvey results into OpenClinica # sorted by study subject id sorted_tokens = sorted(all_tokens, key=itemgetter(0)) last_ssid = 'x' lime_survey_header = 'ev. token completed --------------------------- ' lime_survey_data_to_import = lime_survey_header for token in sorted_tokens: survey_friendly_name = conn.DLookup("friendly_name", "ls_sids", "ls_sid=%d" % (int(token[2]))) if last_ssid != token[0]: # new study subject ID, so write the previous one ssoid = conn.DLookup("study_subject_oid", "subjects", "study_subject_id='%s'" % (last_ssid)) # skip the start-value if last_ssid != 'x': ls_data_in_db = conn.DLookup( "ls_data", "subjects", "study_subject_oid='%s'" % (ssoid)) if lime_survey_data_to_import != ls_data_in_db: myImport = myDataWS.importLSData( ssoid, lime_survey_data_to_import) conn.WriteLSDataToDB(ssoid, lime_survey_data_to_import, myImport) my_report.append_to_report( 'wrote ls_data for subject %s 1' % (ssoid)) # reset the variables last_ssid = token[0] lime_survey_data_to_import = lime_survey_header + survey_friendly_name + ' ' + token[ 3] + ' ' + token[4] + ' ' else: lime_survey_data_to_import = lime_survey_data_to_import + survey_friendly_name + ' ' + token[ 3] + ' ' + token[4] + ' ' # print the last one ssoid = conn.DLookup("study_subject_oid", "subjects", "study_subject_id='%s'" % (last_ssid)) ls_data_in_db = conn.DLookup("ls_data", "subjects", "study_subject_oid='%s'" % (ssoid)) if lime_survey_data_to_import != ls_data_in_db: myImport = myDataWS.importLSData(ssoid, lime_survey_data_to_import) conn.WriteLSDataToDB(ssoid, lime_survey_data_to_import, myImport) my_report.append_to_report('wrote ls_data for subject %s 2' % (ssoid)) # some book keeping to check if we must continue looping, or break the loop # first sleep a bit, so we do not eat up all CPU time.sleep(int(config['sleep_this_long'])) current_time = datetime.datetime.now() difference = current_time - start_time loop_this_long = config['loop_this_long'] max_diff_list = loop_this_long.split(sep=':') max_difference = datetime.timedelta(hours=int(max_diff_list[0]), minutes=int(max_diff_list[1]), seconds=int(max_diff_list[2])) if difference > max_difference: break my_report.append_to_report('finished looping from %s till %s.' % (start_time, current_time)) # close the file so we can send it my_report.close_file() MailThisLogFile('logs/report.txt')
def __init__(self, args, sess, model): print("\nTraining is initializing itself\n") self.args = args self.sess = sess self.model = model # shortcut for model params self.params = self.model.params # To initialize all variables self.init = None self.init_model() # Create a saver object self.saver = tf.train.Saver(max_to_keep=self.args.max_to_keep, keep_checkpoint_every_n_hours=10, save_relative_paths=True) self.saver_best = tf.train.Saver(max_to_keep=1, save_relative_paths=True) # Load from latest checkpoint if found self.load_model() ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = [ 'mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch' ] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]) ] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## if self.args.mode == 'train': self.num_iterations_training_per_epoch = self.args.tfrecord_train_len // self.args.batch_size self.num_iterations_validation_per_epoch = self.args.tfrecord_val_len // self.args.batch_size else: self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_test_data() ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class if self.args.mode == 'train' or 'overfit': self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
#!/usr/bin/env python3 from utils.reporter import Reporter if __name__ == '__main__': reporter = Reporter() reporter.report_release_failure()
def setup(self, env, agent, max_training_episodes, max_test_episodes, max_steps_training, max_steps_testing, save_path=os.path.expanduser("~") + os.path.sep + "urnai_saved_traingings", file_name=str(datetime.now()).replace(" ", "_").replace( ":", "_").replace(".", "_"), enable_save=True, save_every=10, relative_path=False, debug_level=0, reset_epsilon=False, tensorboard_logging=False, log_actions=True, episode_batch_avg_calculation=10, do_reward_test=False, reward_test_number_of_episodes=10, rolling_avg_window_size=20): self.versioner = Versioner() self.env = env self.agent = agent self.save_path = save_path self.file_name = file_name self.enable_save = enable_save self.save_every = save_every self.relative_path = relative_path self.reset_epsilon = reset_epsilon self.max_training_episodes = max_training_episodes self.max_test_episodes = max_test_episodes self.max_steps_training = max_steps_training self.max_steps_testing = max_steps_testing self.curr_training_episodes = -1 self.curr_playing_episodes = -1 rp.VERBOSITY_LEVEL = debug_level self.tensorboard_logging = tensorboard_logging self.log_actions = log_actions self.episode_batch_avg_calculation = episode_batch_avg_calculation self.do_reward_test = do_reward_test self.reward_test_number_of_episodes = reward_test_number_of_episodes self.rolling_avg_window_size = rolling_avg_window_size self.inside_training_test_loggers = [] self.logger = Logger( 0, self.agent.__class__.__name__, self.agent.model.__class__.__name__, self.agent.model, self.agent.action_wrapper.__class__.__name__, self.agent.action_wrapper.get_action_space_dim(), self.agent.action_wrapper.get_named_actions(), self.agent.state_builder.__class__.__name__, self.agent.reward_builder.__class__.__name__, self.env.__class__.__name__, log_actions=self.log_actions, episode_batch_avg_calculation=self.episode_batch_avg_calculation, rolling_avg_window_size=self.rolling_avg_window_size) # Adding epsilon, learning rate and gamma factors to our pickle black list, # so that they are not loaded when loading the model's weights. # Making it so that the current training session acts as a brand new training session # (except for the fact that the model's weights may already be somewhat optimized from previous trainings) if self.reset_epsilon: self.agent.model.pickle_black_list.append("epsilon_greedy") self.agent.model.pickle_black_list.append("epsilon_decay_rate") self.agent.model.pickle_black_list.append("epsilon_min") self.agent.model.pickle_black_list.append("gamma") self.agent.model.pickle_black_list.append("learning_rate") self.agent.model.pickle_black_list.append("learning_rate_min") self.agent.model.pickle_black_list.append("learning_rate_decay") self.agent.model.pickle_black_list.append( "learning_rate_decay_ep_cutoff") currentdir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) parentdir = os.path.dirname(parentdir) if (relative_path): self.full_save_path = parentdir + os.path.sep + self.save_path + os.path.sep + self.file_name else: self.full_save_path = self.save_path + os.path.sep + self.file_name self.full_save_play_path = self.full_save_path + os.path.sep + "play_files" if self.enable_save and os.path.exists(self.full_save_path): rp.report("WARNING! Loading training from " + self.full_save_path + " with SAVING ENABLED.") self.load(self.full_save_path) self.versioner.ask_for_continue() self.make_persistance_dirs(self.log_actions) elif self.enable_save: rp.report("WARNING! Starting new training on " + self.full_save_path + " with SAVING ENABLED.") self.make_persistance_dirs(self.log_actions) else: rp.report( "WARNING! Starting new training WITHOUT SAVING PROGRESS.") if (self.tensorboard_logging): logdir = self.full_save_path + "/tf_logs" self.agent.model.tensorboard_callback = [ tf.keras.callbacks.TensorBoard(log_dir=logdir) ]
def save_extra(self, save_path): self.env.save(save_path) self.agent.save(save_path) self.logger.save(save_path) self.versioner.save(save_path) rp.save(save_path)
def load_extra(self, save_path): self.agent.load(save_path) self.env.load(save_path) self.logger.load(save_path) self.versioner.load(save_path) rp.load(save_path)
#!/usr/bin/env python3 from utils.reporter import Reporter if __name__ == '__main__': reporter = Reporter() reporter.report_test_failure()
def cycle_through_syncs(): my_report = Reporter() start_time = datetime.datetime.now() my_report.append_to_report('INFO: cycle started at ' + str(start_time)) # read configuration file for usernames and passwords and other parameters config=readDictFile('oli.config') # set from this config the sid, because it used everywhere sid = int(config['sid']) # create a connection to the postgresql database conn = ConnToOliDB() my_report.append_to_report(conn.init_result) # initialize the oc-webservice myDataWS = dataWS(config['userName'], config['password'], config['baseUrl']) #start the cycling here while True: # get the responses as a list responses_list = read_ls_responses(config) # process the responses one by one for one_response in responses_list: #print(one_response) # get the response_id, for administrative purposes response_id = one_response['id'] # check if this combination sid-response-id already exists and if not, add it conn.TryToAddSubjectToDB(sid, response_id) # now see if we can do something with the data: start with the child code # reset study_subject_id and study_subject_oid study_subject_id = None study_subject_oid = None if (one_response['ChildCode'] is None): # write this to error report my_report.append_to_report('ERROR: Missing ChildCode for resp.id. %i' % response_id ) else: # add leading zero's and the study prefix study_subject_id = config['childcode_prefix'] + ('0000' + str(int(float(one_response['ChildCode']))))[-8:] if (len(study_subject_id) != 13): # write this to error report my_report.append_to_report('ERROR: Incorrect ChildCode for resp.id. %i: %i' % (response_id, int(float(one_response['ChildCode'])))) else: # write the child-code / study subject id to the database if (conn.DLookup('study_subject_id', 'ls_responses', 'sid=%i and response_id=%i' % (sid, response_id)) is None): conn.WriteStudySubjectID(sid, response_id, study_subject_id) # check if we already have a valid study subject oid study_subject_oid = conn.DLookup('study_subject_oid', 'ls_responses', 'sid=%i and response_id=%i' % (sid, response_id)) if (study_subject_oid is None or study_subject_oid =='None'): # try to get a valid study subject oid study_subject_oid = PGSubject(study_subject_id).GetSSOID() # we don't know if we now have study_subject_oid, # but the procedure only writes the study subject oid to the database for later use # if it is not null conn.WriteStudySubjectOID(sid, response_id, study_subject_oid) # only continue if we have both study subject id and study subject oid if (study_subject_oid is None): # write this to error report my_report.append_to_report('ERROR: missing OID for resp.id. %i : ChildCode %s' % (response_id, study_subject_id)) else: # only compose the odm and try to import the result # if this wasn't done before, so look at date_completed if(conn.DLookup('date_completed', 'ls_responses', 'sid=%i and response_id=%i' % (sid, response_id)) is None): #print(one_response) print('resp.id. %i' % response_id) # we try to compose the request, but if we can't convert an item to the correct data type # then we put that in the report ws_request = compose_odm(study_subject_oid, one_response) if (ws_request.find('CONVERSION-ERROR') != -1): #print(ws_request) item_starts_at = ws_request.find('CONVERSION-ERROR') my_report.append_to_report('ERROR: conversion for resp.id. %i %s failed with message "%s" and more' % (response_id, study_subject_id, ws_request[item_starts_at:item_starts_at + 100])) else: #print(ws_request) conn.WriteDataWSRequest(sid, response_id, ws_request) import_result = myDataWS.importData(ws_request) #print(import_result) import_result = import_result.replace("'", "") conn.WriteDataWSResponse(sid, response_id, import_result) if (import_result.find('Success') == 0): my_report.append_to_report('INFO: Successfully imported data for %s (%s)' % (study_subject_id, study_subject_oid)) conn.SetResponseComplete(sid, response_id) else: item_starts_at = import_result.find('I_') my_report.append_to_report('ERROR: import for resp.id %i %s failed with message "%s" and more' % (response_id, study_subject_id, import_result[item_starts_at:])) # move on with the next response # check if we must continue looping, or break the loop # first sleep a bit, so we do not eat up all CPU time.sleep(int(config['sleep_this_long'])) current_time = datetime.datetime.now() difference = current_time - start_time loop_this_long = config['loop_this_long'] max_diff_list = loop_this_long.split(sep=':') max_difference = datetime.timedelta(hours=int(max_diff_list[0]), minutes=int(max_diff_list[1]), seconds=int(max_diff_list[2])) if difference > max_difference: break my_report.append_to_report('INFO: finished looping from %s till %s.' % (start_time, current_time)) # close the file so we can send it my_report.close_file() MailThisLogFile('logs/report.txt')
class NewTrain(object): def __init__(self, args, sess, model): print("\nTraining is initializing itself\n") self.args = args self.sess = sess self.model = model # shortcut for model params self.params = self.model.params # To initialize all variables self.init = None self.init_model() # Create a saver object self.saver = tf.train.Saver(max_to_keep=self.args.max_to_keep, keep_checkpoint_every_n_hours=10, save_relative_paths=True) self.saver_best = tf.train.Saver(max_to_keep=1, save_relative_paths=True) # Load from latest checkpoint if found self.load_model() ################################################################################## # Init summaries # Summary variables self.scalar_summary_tags = [ 'mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch' ] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]) ] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} # init summaries and it's operators self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) ################################################################################## if self.args.mode == 'train': self.num_iterations_training_per_epoch = self.args.tfrecord_train_len // self.args.batch_size self.num_iterations_validation_per_epoch = self.args.tfrecord_val_len // self.args.batch_size else: self.test_data = None self.test_data_len = None self.num_iterations_testing_per_epoch = None self.load_test_data() ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) # Init reporter class if self.args.mode == 'train' or 'overfit': self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) ################################################################################## @timeit def load_test_data(self): print("Loading Testing data..") self.test_data = { 'X': np.load(self.args.data_dir + "X_val.npy"), 'Y': np.load(self.args.data_dir + "Y_val.npy") } self.test_data_len = self.test_data['X'].shape[ 0] - self.test_data['X'].shape[0] % self.args.batch_size print("Test-shape-x -- " + str(self.test_data['X'].shape)) print("Test-shape-y -- " + str(self.test_data['Y'].shape)) self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size print("Test data is loaded") @timeit def init_model(self): print("Initializing the variables of the model") self.init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess.run(self.init) print("Initialization finished") def save_model(self): """ Save Model Checkpoint :return: """ print("saving a checkpoint") self.saver.save(self.sess, self.args.checkpoint_dir, self.model.global_step_tensor) print("Saved a checkpoint") def save_best_model(self): """ Save BEST Model Checkpoint :return: """ print("saving a checkpoint for the best model") self.saver_best.save(self.sess, self.args.checkpoint_best_dir, self.model.global_step_tensor) print("Saved a checkpoint for the best model") def load_best_model(self): """ Load the best model checkpoint :return: """ print("loading a checkpoint for BEST ONE") latest_checkpoint = tf.train.latest_checkpoint( self.args.checkpoint_best_dir) if latest_checkpoint: print( "Loading model checkpoint {} ...\n".format(latest_checkpoint)) self.saver_best.restore(self.sess, latest_checkpoint) else: print("ERROR NO best checkpoint found") exit(-1) print("BEST MODEL LOADED..") def init_summaries(self): """ Create the summary part of the graph :return: """ with tf.variable_scope('train-summary-per-epoch'): for tag in self.scalar_summary_tags: self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) self.summary_ops[tag] = tf.summary.scalar( tag, self.summary_placeholders[tag]) for tag, shape in self.images_summary_tags: self.summary_tags += tag self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) self.summary_ops[tag] = tf.summary.image( tag, self.summary_placeholders[tag], max_outputs=10) def add_summary(self, step, summaries_dict=None, summaries_merged=None): """ Add the summaries to tensorboard :param step: :param summaries_dict: :param summaries_merged: :return: """ if summaries_dict is not None: summary_list = self.sess.run( [self.summary_ops[tag] for tag in summaries_dict.keys()], { self.summary_placeholders[tag]: value for tag, value in summaries_dict.items() }) for summary in summary_list: self.summary_writer.add_summary(summary, step) if summaries_merged is not None: self.summary_writer.add_summary(summaries_merged, step) @timeit def load_model(self): """ Load the latest checkpoint :return: """ try: # This is for loading the pretrained weights if they can't be loaded during initialization. self.model.encoder.load_pretrained_weights(self.sess) except AttributeError: pass print("Searching for a checkpoint") latest_checkpoint = tf.train.latest_checkpoint( self.args.checkpoint_dir) if latest_checkpoint: print( "Loading model checkpoint {} ...\n".format(latest_checkpoint)) self.saver.restore(self.sess, latest_checkpoint) print("Model loaded from the latest checkpoint\n") else: print("\n.. No ckpt, SO First time to train :D ..\n") def train(self): print("Training mode will begin NOW ..") tf.train.start_queue_runners(sess=self.sess) curr_lr = self.model.args.learning_rate for cur_epoch in range( self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): # init tqdm and get the epoch value tt = tqdm(range(self.num_iterations_training_per_epoch), total=self.num_iterations_training_per_epoch, desc="epoch-" + str(cur_epoch) + "-") # init acc and loss lists loss_list = [] acc_list = [] # loop by the number of iterations for cur_iteration in tt: # get the cur_it for the summary cur_it = self.model.global_step_tensor.eval(self.sess) # Feed this variables to the network feed_dict = { self.model.handle: self.model.training_handle, self.model.is_training: True, self.model.curr_learning_rate: curr_lr } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_training_per_epoch - 1: # run the feed_forward _, loss, acc, summaries_merged = self.sess.run( [ self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries ], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] # summarize self.add_summary(cur_it, summaries_merged=summaries_merged) else: # run the feed_forward _, loss, acc, summaries_merged, segmented_imgs = self.sess.run( [ self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries, self.model.segmented_summary ], feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) # summarize summaries_dict = dict() summaries_dict['train-loss-per-epoch'] = total_loss summaries_dict['train-acc-per-epoch'] = total_acc summaries_dict['train_prediction_sample'] = segmented_imgs self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics( 'train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) self.reporter.report_experiment_statistics( 'train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) self.reporter.finalize() # Update the Global step self.model.global_step_assign_op.eval( session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # Update the Cur Epoch tensor # it is the last thing because if it is interrupted it repeat this self.model.global_epoch_assign_op.eval( session=self.sess, feed_dict={ self.model.global_epoch_input: cur_epoch + 1 }) # print in console tt.close() print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[:6]) # Break the loop to finalize this epoch break # Update the Global step self.model.global_step_assign_op.eval( session=self.sess, feed_dict={self.model.global_step_input: cur_it + 1}) # Save the current checkpoint if cur_epoch % self.args.save_every == 0: self.save_model() # Test the model on validation if cur_epoch % self.args.test_every == 0: self.test_per_epoch( step=self.model.global_step_tensor.eval(self.sess), epoch=self.model.global_epoch_tensor.eval(self.sess)) if cur_epoch % self.args.learning_decay_every == 0: curr_lr = curr_lr * self.args.learning_decay print('Current learning rate is ', curr_lr) print("Training Finished") def test_per_epoch(self, step, epoch): print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") # init tqdm and get the epoch value tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, desc="Val-epoch-" + str(epoch) + "-") # init acc and loss lists loss_list = [] acc_list = [] inf_list = [] # reset metrics self.metrics.reset() # get the maximum iou to compare with and save the best model max_iou = self.model.best_iou_tensor.eval(self.sess) # init dataset to validation self.sess.run(self.model.validation_iterator.initializer) # loop by the number of iterations for cur_iteration in tt: # Feed this variables to the network feed_dict = { self.model.handle: self.model.validation_handle, self.model.is_training: False } # Run the feed forward but the last iteration finalize what you want to do if cur_iteration < self.num_iterations_validation_per_epoch - 1: start = time.time() # run the feed_forward next_img, out_argmax, loss, acc = self.sess.run( [ self.model.next_img, self.model.out_argmax, self.model.loss, self.model.accuracy ], feed_dict=feed_dict) end = time.time() # log loss and acc loss_list += [loss] acc_list += [acc] inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, next_img[1]) else: start = time.time() # run the feed_forward next_img, out_argmax, loss, acc, segmented_imgs = self.sess.run( [ self.model.next_img, self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.segmented_summary ], feed_dict=feed_dict) end = time.time() # log loss and acc loss_list += [loss] acc_list += [acc] inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, next_img[1]) # mean over batches total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics( self.num_iterations_validation_per_epoch) mean_iou_arr = self.metrics.iou mean_inference = str(np.mean(inf_list)) + '-seconds' # summarize summaries_dict = dict() summaries_dict['val-loss-per-epoch'] = total_loss summaries_dict['val-acc-per-epoch'] = total_acc summaries_dict['mean_iou_on_val'] = mean_iou summaries_dict['val_prediction_sample'] = segmented_imgs self.add_summary(step, summaries_dict=summaries_dict) self.summary_writer.flush() # report self.reporter.report_experiment_statistics( 'validation-acc', 'epoch-' + str(epoch), str(total_acc)) self.reporter.report_experiment_statistics( 'validation-loss', 'epoch-' + str(epoch), str(total_loss)) self.reporter.report_experiment_statistics( 'avg_inference_time_on_validation', 'epoch-' + str(epoch), str(mean_inference)) self.reporter.report_experiment_validation_iou( 'epoch-' + str(epoch), str(mean_iou), mean_iou_arr) self.reporter.finalize() # print in console tt.close() print("Val-epoch-" + str(epoch) + "-" + "loss:" + str(total_loss) + "-" + "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou)) print("Last_max_iou: " + str(max_iou)) if mean_iou > max_iou: print( "This validation got a new best iou. so we will save this one" ) # save the best model self.save_best_model() # Set the new maximum self.model.best_iou_assign_op.eval( session=self.sess, feed_dict={self.model.best_iou_input: mean_iou}) else: print("hmm not the best validation epoch :/..") # Break the loop to finalize this epoch break def test(self): print("Testing mode will begin NOW..") # load the best model checkpoint to test on it self.load_best_model() # init tqdm and get the epoch value tt = tqdm(range(self.test_data_len)) naming = np.load(self.args.data_dir + 'names_train.npy') # init acc and loss lists loss_list = [] acc_list = [] img_list = [] # idx of image idx = 0 # reset metrics self.metrics.reset() # loop by the number of iterations for cur_iteration in tt: # load mini_batches x_batch = self.test_data['X'][idx:idx + 1] y_batch = self.test_data['Y'][idx:idx + 1] # update idx of mini_batch idx += 1 # Feed this variables to the network feed_dict = { self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False } # run the feed_forward out_argmax, loss, acc, summaries_merged, segmented_imgs = self.sess.run( [ self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries, self.model.segmented_summary ], feed_dict=feed_dict) np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy', out_argmax[0]) plt.imsave( self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0]) # log loss and acc loss_list += [loss] acc_list += [acc] # log metrics self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0) # mean over batches total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics(self.test_data_len) # print in console tt.close() print("Here the statistics") print("Total_loss: " + str(total_loss)) print("Total_acc: " + str(total_acc)[:6]) print("mean_iou: " + str(mean_iou)) print("Plotting imgs") def finalize(self): self.reporter.finalize() self.summary_writer.close() self.save_model()