Пример #1
0
    def log_training_start_information(self):
        text = ("\n   Agent: {}\n".format(self.agent_name) +
                "   ActionWrapper: {}\n".format(self.action_wrapper_name) +
                "   StateBuilder: {}\n".format(self.state_builder_name) +
                "   RewardBuilder: {}\n".format(self.reward_builder_name) +
                "   Environment: {}\n".format(self.env_name) +
                "   Model: {}\n".format(self.model_name))

        if (hasattr(self.model, "lib")):
            if self.model.neural_net_class != None:
                if self.model.lib == constants.Libraries.KERAS:
                    stringlist = []
                    self.model.dnn.model.summary(
                        print_fn=lambda x: stringlist.append(x))
                    short_model_summary = "\n".join(stringlist)
                    text += "       " + short_model_summary
                if self.model.lib == constants.Libraries.PYTORCH:
                    text += "       " + self.model.dnn.model
            else:
                for idx, (layer) in enumerate(self.model.build_model):
                    text += "       Layer {}: {}\n".format(
                        idx, self.model.build_model[idx])
        else:
            for idx, (layer) in enumerate(self.model.build_model):
                text += "       Layer {}: {}\n".format(
                    idx, self.model.build_model[idx])

        self.training_report += text

        rp.report(text)
Пример #2
0
 def save(self, savepath):
     '''
     This method saves pickle objects
     and extra stuff needed
     '''
     rp.report("Saving {} object...".format(self.__class__.__name__), verbosity_lvl=1)
     self.save_pickle(savepath)
     self.save_extra(savepath)
Пример #3
0
    def ask_for_continue(self):
        if self.version != self.__curr_version:
            answer = ""
            while answer.lower() != "y" and answer.lower() != "n":
                answer = rp.input("The loaded training version is {} and the current version is {}. This difference can cause some kind of error while proceeding to the training, do you wish to continue? [y/n]".format(self.version, self.__curr_version), "n")

                if answer.lower() == "n":
                    rp.report("The training was stopped.")
                    exit()
Пример #4
0
    def get_sc2_reward(self, obs):
        build_supply_depot = BuildUnitsGeneralizedRewardBuilder.ACTION_BUILD_SUPPLY_DEPOT
        build_barrack = BuildUnitsGeneralizedRewardBuilder.ACTION_BUILD_BARRACK
        build_marine = BuildUnitsGeneralizedRewardBuilder.ACTION_BUILD_MARINE
        do_nothing = BuildUnitsGeneralizedRewardBuilder.ACTION_DO_NOTHING

        current = self.get_sc2_number_of_supply_depot(obs)
        prev = self.get_sc2_number_of_supply_depot(self.previous_state)
        supply_depot_amount_diff = (current - prev)

        current = self.get_sc2_number_of_barracks(obs)
        prev = self.get_sc2_number_of_barracks(self.previous_state)
        barracks_amount_diff = (current - prev)

        current = self.get_sc2_number_of_marines(obs)
        prev = self.get_sc2_number_of_marines(self.previous_state)
        marines_amount_diff = (current - prev)

        negative_rwd = 0
        chosen_action = BuildUnitsGeneralizedRewardBuilder.LAST_CHOSEN_ACTION
        if chosen_action > -1:
            supply_depot_amount = self.get_sc2_number_of_supply_depot(obs)
            barracks_amount = self.get_sc2_number_of_barracks(obs)
            minerals = obs.player.minerals
            if chosen_action == build_supply_depot:
                if supply_depot_amount > 7 or minerals < 100:
                    negative_rwd = -10
            elif chosen_action == build_barrack:
                if supply_depot_amount <= 0 or minerals < 150:
                    negative_rwd = -10
            elif chosen_action == build_marine:
                if barracks_amount <= 0 or minerals < 50:
                    negative_rwd = -10
            #elif chosen_action == do_nothing:
            #    negative_rwd = -1

        #rwd = negative_rwd + rwdB + rwdC
        rp.report('''
Calculated reward is: {},
composed of:
supply_depot_amount: {},
barracks_amount: {},
marines_amount: {},
negative_rdw: {}
                '''.format(
            negative_rwd + supply_depot_amount_diff +
            barracks_amount_diff * 10 + marines_amount_diff * 100,
            supply_depot_amount_diff, barracks_amount_diff * 10,
            marines_amount_diff * 100, negative_rwd),
                  verbosity_lvl=1)
        if supply_depot_amount_diff < 0 or barracks_amount_diff < 0 or marines_amount_diff < 0:
            return 0
        else:
            rwd = negative_rwd + supply_depot_amount_diff + barracks_amount_diff * 10 + marines_amount_diff * 100
            return rwd
Пример #5
0
    def __init__(self):
        conf = ConfigParser.ConfigParser()
        conf.read(CONFIG_PATH)
        self.es_url = conf.get("ES", "es_url")
        self.esindex_prefix = conf.get("ES", "esindex_prefix")
        self.data_type = conf.get("indices_module", "data_type")
        self.sampling_speed = int(conf.get("indices_module", "sampling_speed"))
        self.store_size_unit = conf.get("indices_module", "store_size_unit")

        self.indices_parser = IndicesParser()
        self.reporter = Reporter()
Пример #6
0
    def __init__(self):
        conf = ConfigParser.ConfigParser()
        conf.read(CONFIG_PATH)
        self.es_url = conf.get("ES", "es_url")
        self.esindex_prefix = conf.get("ES", "esindex_prefix")
        self.data_type = conf.get("nodes_module", "data_type")
        self.sampling_speed = int(conf.get("nodes_module", "sampling_speed"))
        self.data_structure = conf.get("nodes_module", "data_structure")
        self.nodes_total_count = conf.get("nodes_module", "nodes_total_count")

        self.nodes_parser = NodesParser()
        self.es_template = EsTemplate()
        self.reporter = Reporter()
Пример #7
0
    def __init__(self, args, sess, model):
        """
        Call the constructor of the base class
        init summaries
        init loading data
        :param args:
        :param sess:
        :param model:
        :return:
        """
        super().__init__(args, sess, model)
        # Init load data and generator
        self.generator = None
        self.run = None

        # 加载数据
        if self.args.data_mode == "realsense":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_realsence_data()
        elif self.args.data_mode == "cityscapes_val":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_val_data()
        elif self.args.data_mode == "cityscapes_test":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_test_data()
        elif self.args.data_mode == "video":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_vid_data()

        if self.args.task == "test":
            self.run = self.test
        elif self.args.task == "realsense":
            self.run = self.realsense_inference
        elif self.args.task == "realsense_imgs":
            self.run = self.realsense_imgs
        else:
            print("ERROR Please select a proper data_mode BYE")
            exit(-1)

        # Init metrics class
        self.metrics = Metrics(self.args.num_classes)
        # Init reporter class
        self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
def cycle_through_syncs():
    my_report = Reporter()

    start_time = datetime.datetime.now()
    my_report.append_to_report('INFO: cycle started at ' + str(start_time))
    # read configuration file for usernames and passwords and other parameters
    config = readDictFile('oli.config')
    # set from this config the survey id, sid, because it used everywhere
    sid = int(config['sid'])

    # create a connection to the postgresql database
    conn = ConnToOliDB()
    my_report.append_to_report(conn.init_result)

    # initialize the oc-webservice
    myDataWS = dataWS(config['userName'], config['password'],
                      config['baseUrl'])

    tokens = {}
    tokens_list = read_ls_tokens(config, 0, 10)
    for token in tokens_list:
        tokens[token['token']] = token['participant_info']['firstname']
    tokens_list = read_ls_tokens(config, 10, 10)
    for token in tokens_list:
        tokens[token['token']] = token['participant_info']['firstname']
    print(tokens)

    # close the file so we can send it
    my_report.close_file()
    MailThisLogFile('logs/report.txt')
Пример #9
0
 def load_pickle(self, persist_path):
     '''
     This method loads a list instance
     saved by pickle.
     '''
     #Check if pickle file exists
     pickle_path = self.get_full_persistance_pickle_path(persist_path)
     exists_pickle = os.path.isfile(pickle_path)
     #If yes, load it
     if exists_pickle:
         if os.path.getsize(pickle_path) > 0: 
             with open(pickle_path, "rb") as pickle_in: 
                 pickle_dict = pickle.load(pickle_in)
                 self.restore_pickleable_attributes(pickle_dict)
                 rp.report("**************************************** \n Pickle for " + self.get_default_save_stamp() + " loaded. \n****************************************", 1)
Пример #10
0
    def log_train_stats(self):
        if self.ep_count > 0:
            text = ("\n" + "Current Reward Avg.: {}".format(
                sum(self.ep_rewards) / self.ep_count) +
                    " Win rate: {:10.3f}%".format(
                        (sum(self.ep_victories) / self.ep_count) * 100) +
                    " Avg number of steps: {}".format(
                        sum(self.ep_avg_steps) / self.ep_count) +
                    " Training Duration (seconds): {}".format(
                        round(time() - self.training_start, 2)) + "\n")

            self.training_report += text

            rp.report(text)
        else:
            rp.report("There are no recorded episodes!")
Пример #11
0
    def step(self, action):
        if (self.game == GeneralizedBuildUnitsScenario.GAME_DEEP_RTS):
            BuildUnitsGeneralizedRewardBuilder.LAST_CHOSEN_ACTION = action
            if self.steps == 0:
                self.setup_map()
                self.spawn_army()
            elif self.steps == 1:
                self.collect_gold()

            if rp.VERBOSITY_LEVEL > 0:
                str_ = '''  DRTS Episode Status:
                 Number of gold = {},
                 Number of barracks = {},
                 Number of farms = {},
                 Number of soldiers = {}'''.format(
                         self.env.players[0].gold,
                         self.get_drts_unit_type_count(0, self.env.constants.Unit.Barracks),
                         self.get_drts_unit_type_count(0, self.env.constants.Unit.Farm),
                         self.get_drts_unit_type_count(0, self.env.constants.Unit.Footman),
                         )
                rp.report(str_, verbosity_lvl=1)
            state, reward, done = None, None, None 
            if action == GeneralizedBuildUnitsScenario.ACTION_DRTS_DO_NOTHING:
                no_action = 15
                state, reward, done = self.env.step(no_action)
            elif action == GeneralizedBuildUnitsScenario.ACTION_DRTS_BUILD_FARM:
                self.build_farm()
                no_action = 15
                state, reward, done = self.env.step(no_action)
            elif action == GeneralizedBuildUnitsScenario.ACTION_DRTS_BUILD_BARRACK:
                self.build_barrack()
                no_action = 15
                state, reward, done = self.env.step(no_action)
            elif action == GeneralizedBuildUnitsScenario.ACTION_DRTS_BUILD_FOOTMAN:
                self.build_footman()
                no_action = 15
                state, reward, done = self.env.step(no_action)
            else:
                state, reward, done = self.env.step(action)
            self.steps += 1
            return state, reward, done 

        elif (self.game == GeneralizedBuildUnitsScenario.GAME_STARCRAFT_II):
            self.steps += 1
            return self.env.step(action)
Пример #12
0
    def log_ep_stats(self):
        if self.ep_count > 0:

            agent_info = dict.fromkeys(self.agent_info)
            for key in agent_info:
                agent_info[key] = self.agent_info[key][-1]

            rp.report(
                "Episode: {}/{} | Outcome: {} | Episode Avg. Reward: {:10.6f} | Episode Reward: {:10.6f} | Episode Steps: {:10.6f} | Best Reward was {} on episode: {} | Episode Duration (seconds): {} | Episode SPS: {} | SPS AVG: {} | Agent info: {}"
                .format(self.ep_count, self.ep_total, self.ep_victories[-1],
                        self.ep_avg_rewards[-1], self.ep_rewards[-1],
                        self.ep_steps_count[-1], self.best_reward,
                        self.best_reward_episode,
                        self.episode_duration_list[-1],
                        self.episode_sps_list[-1], self.avg_sps_list[-1],
                        agent_info))
        else:
            rp.report("There are no recorded episodes!")
Пример #13
0
    def test_agent(self):
        #backup attributes
        max_test_episodes_backup = self.max_test_episodes
        curr_playing_episodes_backup = self.curr_playing_episodes
        logger_backup = self.logger
        #full_save_play_path_backup = self.full_save_play_path
        enable_save_backup = self.enable_save

        #set attributes to test agent
        self.enable_save = False
        #self.full_save_play_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format(self.curr_training_episodes)
        #self.make_persistance_dirs(self.log_actions)
        self.max_test_episodes = self.reward_test_number_of_episodes
        self.curr_playing_episodes = 0

        rp.report("> Starting to check current agent performance.")
        #make the agent play
        self.play()
        rp.report("> Finished checking current agent performance.")

        #get_reward_avg
        rwd_avg = self.logger.ep_avg_rewards[-1]
        #save this logger for later saving
        #this is needed to get some more detailed
        #info on tests
        logger_dict = {}
        logger_dict["logger"] = self.logger
        logger_dict["saved"] = False
        self.inside_training_test_loggers.append(logger_dict)

        #restore backup
        self.max_test_episodes = max_test_episodes_backup
        self.curr_playing_episodes = curr_playing_episodes_backup
        self.logger = logger_backup
        #self.full_save_play_path = full_save_play_path_backup
        self.enable_save = enable_save_backup

        #register reward avg:
        self.logger.inside_training_test_avg_rwds.append(rwd_avg)
Пример #14
0
    def training_loop(self, is_testing, reward_from_agent=True):
        start_time = time.time()
        #current_episodes = 0

        if is_testing:
            rp.report("\n\n> Playing")
            max_episodes = self.max_test_episodes
            max_steps = self.max_steps_testing
            current_episodes = self.curr_playing_episodes
        else:
            rp.report("> Training")
            max_episodes = self.max_training_episodes
            max_steps = self.max_steps_training
            current_episodes = self.curr_training_episodes

        if self.logger.ep_count == 0 or is_testing:
            self.logger = Logger(
                max_episodes,
                self.agent.__class__.__name__,
                self.agent.model.__class__.__name__,
                self.agent.model,
                self.agent.action_wrapper.__class__.__name__,
                self.agent.action_wrapper.get_action_space_dim(),
                self.agent.action_wrapper.get_named_actions(),
                self.agent.state_builder.__class__.__name__,
                self.agent.reward_builder.__class__.__name__,
                self.env.__class__.__name__,
                log_actions=self.log_actions,
                episode_batch_avg_calculation=self.
                episode_batch_avg_calculation,
                rolling_avg_window_size=self.rolling_avg_window_size)

        while current_episodes < max_episodes:
            current_episodes += 1
            self.env.start()

            if is_testing:
                self.curr_playing_episodes = current_episodes
            else:
                self.curr_training_episodes = current_episodes

            # Reset the environment
            obs = self.env.reset()
            step_reward = 0
            done = False
            # Passing the episode to the agent reset, so that it can be passed to model reset
            # Allowing the model to track the episode number, and decide if it should diminish the
            # Learning Rate, depending on the currently selected strategy.
            self.agent.reset(current_episodes)

            ep_reward = 0
            victory = False

            ep_actions = np.zeros(
                self.agent.action_wrapper.get_action_space_dim())
            self.logger.record_episode_start()

            for step in range(max_steps):
                # Choosing an action and passing it to our env.step() in order to act on our environment
                action = self.agent.step(obs, done, is_testing)
                # Take the action (a) and observe the outcome state (s') and reward (r)
                obs, default_reward, done = self.env.step(action)

                # Logic to test wheter this is the last step of this episode
                is_last_step = step == max_steps - 1
                done = done or is_last_step

                # Checking whether or not to use the reward from the reward builder so we can pass that to the agent
                if reward_from_agent:
                    step_reward = self.agent.get_reward(
                        obs, default_reward, done)
                else:
                    step_reward = default_reward

                # Making the agent learn
                if not is_testing:
                    self.agent.learn(obs, step_reward, done)

                # Adding our step reward to the total count of the episode's reward
                ep_reward += step_reward
                ep_actions[self.agent.previous_action] += 1

                if done:
                    victory = default_reward == 1
                    agent_info = {
                        "Learning rate": self.agent.model.learning_rate,
                        "Gamma": self.agent.model.gamma,
                        "Epsilon": self.agent.model.epsilon_greedy,
                    }
                    self.logger.record_episode(ep_reward, victory, step + 1,
                                               agent_info, ep_actions)
                    break

            self.logger.log_ep_stats()

            # check if user wants to pause training and test agent
            # if self.do_reward_test and current_episodes % self.episode_batch_avg_calculation == 0 and current_episodes > 1:
            if (
                    not is_testing
            ) and self.do_reward_test and current_episodes % self.episode_batch_avg_calculation == 0:
                self.test_agent()

            # if this is not a test (evaluation), saving is enabled and we are in a multiple
            # of our save_every variable then we save the model and generate graphs
            if (
                    not is_testing
            ) and self.enable_save and current_episodes > 0 and current_episodes % self.save_every == 0:
                self.save(self.full_save_path)

                # if we have done tests along the training save all loggers for further detailed analysis
                if self.do_reward_test and len(
                        self.inside_training_test_loggers) > 0:
                    for idx in range(
                            len(self.logger.ep_avg_batch_rewards_episodes)):
                        logger_dict = self.inside_training_test_loggers[idx]
                        if not logger_dict["saved"]:
                            episode = self.logger.ep_avg_batch_rewards_episodes[
                                idx]
                            backup_full_save_path = self.full_save_path
                            self.full_save_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format(
                                episode)
                            self.make_persistance_dirs(self.log_actions)
                            logger_dict["logger"].save(self.full_save_path)
                            logger_dict["saved"] = True
                            self.full_save_path = backup_full_save_path

        end_time = time.time()
        if is_testing:
            rp.report("\n> Test duration: {} seconds".format(end_time -
                                                             start_time))
            self.logger.log_train_stats()
        else:
            rp.report("\n> Training duration: {} seconds".format(end_time -
                                                                 start_time))
            self.logger.log_train_stats()

        # Saving the model at the end of the training loop
        if self.enable_save:
            if is_testing:
                self.logger.save(self.full_save_play_path)
                rp.save(self.full_save_play_path)
            else:
                self.save(self.full_save_path)

                # if we have done tests along the training save all loggers for further detailed analysis
                if self.do_reward_test and len(
                        self.inside_training_test_loggers) > 0:
                    for idx in range(
                            len(self.logger.ep_avg_batch_rewards_episodes)):
                        logger_dict = self.inside_training_test_loggers[idx]
                        if not logger_dict["saved"]:
                            episode = self.logger.ep_avg_batch_rewards_episodes[
                                idx]
                            backup_full_save_path = self.full_save_path
                            self.full_save_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format(
                                episode)
                            self.make_persistance_dirs(self.log_actions)
                            logger_dict["logger"].save(self.full_save_path)
                            logger_dict["saved"] = True
                            self.full_save_path = backup_full_save_path
Пример #15
0
def cycle_through_syncs():
    # we start by reading the config file and preparing the connections to the databases
    my_report = Reporter()
    start_time = datetime.datetime.now()
    my_report.append_to_report('cycle started at ' + str(start_time))
    # read configuration file for usernames and passwords and other parameters
    config = readDictFile('odkoc.config')

    # initialise the oc-webservices
    myWebService = studySubjectWS(config['userName'], config['password'],
                                  config['baseUrl'])
    myEventWS = studyEventWS(config['userName'], config['password'],
                             config['baseUrl'])
    myDataWS = dataWS(config['userName'], config['password'],
                      config['baseUrl'])

    # create connections to the postgresql databases
    conn_util = ConnToOdkUtilDB()
    my_report.append_to_report('try to connect to util database, result: %s ' %
                               conn_util.init_result)
    conn_odk = ConnToOdkDB()
    my_report.append_to_report('try to connect to odk database, result: %s ' %
                               conn_odk.init_result)

    # our cycle starts here and ends at the break
    while True:
        '''
        start with form READER
        '''
        # 1: start with retrieving the rows of odk-table HS_RDT_READER_1_V1_CORE
        odk_results = conn_odk.ReadDataFromOdkTable(
            "odk_prod.\"HS_RDT_READER_1_V1_CORE\"")
        # for the study subject id look in:
        # odk_result['GENERAL_INFORMATION_STUDY_SUBJECT_ID']

        # 2: create subject in oc, if necessary
        # retrieve all StudySubjectEvents from oc, using the webservice
        allStudySubjectsInOC = myWebService.getListStudySubjects(
            config['studyIdentifier'])

        for odk_result in odk_results:
            # check if StudySubjectID from odk is already in oc
            add_subject_to_db = True
            study_subject_id = odk_result[
                'GENERAL_INFORMATION_STUDY_SUBJECT_ID']
            # compare with all oc subjects events
            for studysubjectid_oc in allStudySubjectsInOC:
                if (studysubjectid_oc == study_subject_id):
                    add_subject_to_db = False

            if (add_subject_to_db):
                # add study_subject_id to the oc
                add_results = myWebService.addStudySubject(
                    config['studyIdentifier'], config['siteIdentifier'],
                    study_subject_id)
                #print(add_results)
                # TODO: add error-handling for fail of creating subject
                # and schedule the event
                study_subject_oid = myEventWS.scheduleEvent(
                    config['studyIdentifier'], study_subject_id,
                    config['studyEventOID'], 'def', '1980-01-01')
                #TODO: add errorhandling for fail of scheduling event

                # now add the combination id oid to the util database
                # only add the pair if the oid starts with SS_
                if (study_subject_oid.find('SS_') == 0):
                    conn_util.AddSubjectToDB(study_subject_oid,
                                             study_subject_id)

            # extra check: maybe we somehow missed the study subject oid and then there will be no record in table study_subject_oc
            if (conn_util.DLookup(
                    'study_subject_oid', 'odkoc.study_subject_oc',
                    'study_subject_id=\'%s\'' % (study_subject_id)) == ''):
                new_subject = PGSubject(study_subject_id)
                conn_util.AddSubjectToDB(new_subject.GetSSOID(),
                                         study_subject_id)

            # only import the data if this hasn't been done before
            if (not conn_util.UriComplete(odk_result['_URI'])):
                # now we should have the study subject id plus oid, so we can compose the odm for import
                study_subject_id = odk_result[
                    'GENERAL_INFORMATION_STUDY_SUBJECT_ID']
                study_subject_oid = conn_util.DLookup(
                    'study_subject_oid', 'odkoc.study_subject_oc',
                    'study_subject_id=\'%s\'' % (study_subject_id))
                complete_odm = compose_reader(study_subject_oid, odk_result)

                import_results = myDataWS.importData(complete_odm)
                # if our import was successful, then the result should start with Success
                # and if so, we can mark this uri as complete
                if (import_results.find('Success') == 0):
                    conn_util.MarkUriComplete(odk_result['_URI'], 'reader')

                my_report.append_to_report('reader ' + study_subject_id +
                                           ': ' + import_results)
        '''
        go on with with form SCREENING
        '''
        odk_results = conn_odk.ReadDataFromOdkTable(
            "odk_prod.\"SCREEN19M__V3_CORE\"",
            'not \"INFORMED_CONSENT_STUDY_SUBJECT_ID\" is null')
        # for the study subject id look in:
        # odk_result['INFORMED_CONSENT_STUDY_SUBJECT_ID']

        # 2: create subject in oc, if necessary
        # retrieve all StudySubjectEvents from oc, using the webservice
        allStudySubjectsInOC = myWebService.getListStudySubjects(
            config['studyIdentifier'])

        for odk_result in odk_results:
            # check if StudySubjectID from odk is already in oc
            add_subject_to_db = True
            study_subject_id = odk_result['INFORMED_CONSENT_STUDY_SUBJECT_ID']
            print(study_subject_id)
            # compare with all oc subjects events
            for studysubjectid_oc in allStudySubjectsInOC:
                if (studysubjectid_oc == study_subject_id):
                    add_subject_to_db = False

            if (add_subject_to_db):
                # add study_subject_id to the oc
                add_results = myWebService.addStudySubject(
                    config['studyIdentifier'], config['siteIdentifier'],
                    study_subject_id)
                # TODO: add error-handling for fail of creating subject
                # and schedule the event
                study_subject_oid = myEventWS.scheduleEvent(
                    config['studyIdentifier'], study_subject_id,
                    config['studyEventOID'], 'def', '1980-01-01')
                #TODO: add errorhandling for fail of scheduling event

                # now add the combination id oid to the util database
                # only add the pair if the oid starts with SS_
                if (study_subject_oid.find('SS_') == 0):
                    conn_util.AddSubjectToDB(study_subject_oid,
                                             study_subject_id)

            # extra check: maybe we somehow missed the study subject oid and then there will be no record in table study_subject_oc
            if (conn_util.DLookup(
                    'study_subject_oid', 'odkoc.study_subject_oc',
                    'study_subject_id=\'%s\'' % (study_subject_id)) == ''):
                new_subject = PGSubject(study_subject_id)
                conn_util.AddSubjectToDB(new_subject.GetSSOID(),
                                         study_subject_id)

            print('we have study subject id %s and oid %s' %
                  (study_subject_id,
                   conn_util.DLookup(
                       'study_subject_oid', 'odkoc.study_subject_oc',
                       'study_subject_id=\'%s\'' % (study_subject_id))))
            print(odk_result['_URI'],
                  conn_util.UriComplete(odk_result['_URI']))
            # only import the data if this hasn't been done before
            if (not conn_util.UriComplete(odk_result['_URI'])):
                print('dive into it')
                # now we should have the study subject id plus oid, so we can compose the odm for import
                study_subject_id = odk_result[
                    'INFORMED_CONSENT_STUDY_SUBJECT_ID']
                study_subject_oid = conn_util.DLookup(
                    'study_subject_oid', 'odkoc.study_subject_oc',
                    'study_subject_id=\'%s\'' % (study_subject_id))
                complete_odm = compose_screening(study_subject_oid, odk_result)
                print(complete_odm)
                # we'll make an exception for I_MA006_OTHER_DISEASE_HX, because this is a group of check-boxes
                # in complete_odm we have a placeholder {OTHER_DISEASE_HX}
                parent_uri = odk_result['_URI']
                hx_results = conn_odk.ReadDataFromOdkTable(
                    "odk_prod.\"SCREEN19M__V3_MED_HISTO_CONCO_MED_OTHER_DISEASE_HX\"",
                    '\"_PARENT_AURI\"=\'%s\'' % (parent_uri))
                other_disease_hx = ''
                for hx in hx_results:
                    other_disease_hx = other_disease_hx + hx['VALUE'] + ','

                if (other_disease_hx != ''):
                    # take off the last comma
                    other_disease_hx = other_disease_hx[:-1]

                # finally we can replace the placeholder with the actual values
                complete_odm = complete_odm.replace('{OTHER_DISEASE_HX}',
                                                    other_disease_hx)

                # import the odm data
                import_results = myDataWS.importData(complete_odm)
                if (import_results.find('Success') != 0):
                    # if something went wrong, print it
                    print(import_results)
                    import_screening_core_success = False
                else:
                    # if our import was successful, then make a note of it
                    import_screening_core_success = True
                    my_report.append_to_report('screening ' +
                                               study_subject_id + ': ' +
                                               import_results)
                '''    
                now we can look at the repeating item group for miscarriages
                '''

                odk_misca_results = conn_odk.ReadDataFromOdkTable(
                    "odk_prod.\"SCREEN19M__V3_OBSETRIC_INFO_EXAM_BABY_DIED\"",
                    '\"_PARENT_AURI\"=\'%s\'' % (parent_uri))
                for misca in odk_misca_results:
                    # print('misca ' + parent_uri + ' ' + misca['_URI'])
                    complete_odm = compose_misca(study_subject_oid, misca)
                    import_results = myDataWS.importData(complete_odm)

                if (import_results.find('Success') != 0):
                    # if something went wrong, print it
                    print(import_results)
                    import_screening_misca_success = False
                else:
                    # if our import was successful, then make a note of it
                    import_screening_misca_success = True
                    my_report.append_to_report('misca ' + study_subject_id +
                                               ': ' + import_results)

                # now do the bookkeeping
                if (import_screening_core_success
                        and import_screening_misca_success):
                    conn_util.MarkUriComplete(odk_result['_URI'], 'screening')
        '''
        go on with with form LAMP
        '''
        odk_results = conn_odk.ReadDataFromOdkTable(
            "odk_prod.\"LAMP_TESTING_V1_CORE\"",
            'not \"GENERAL_INFORMATION_STUDY_SUBJECT_ID\" is null')
        # for the study subject id look in:
        # odk_result['GENERAL_INFORMATION_STUDY_SUBJECT_ID']

        # 2: create subject in oc, if necessary
        # retrieve all StudySubjectEvents from oc, using the webservice
        allStudySubjectsInOC = myWebService.getListStudySubjects(
            config['studyIdentifier'])

        for odk_result in odk_results:
            # check if StudySubjectID from odk is already in oc
            add_subject_to_db = True
            study_subject_id = odk_result[
                'GENERAL_INFORMATION_STUDY_SUBJECT_ID']
            # compare with all oc subjects events
            for studysubjectid_oc in allStudySubjectsInOC:
                if (studysubjectid_oc == study_subject_id):
                    add_subject_to_db = False

            if (add_subject_to_db):
                # add study_subject_id to the oc
                add_results = myWebService.addStudySubject(
                    config['studyIdentifier'], config['siteIdentifier'],
                    study_subject_id)
                # TODO: add error-handling for fail of creating subject
                # and schedule the event
                study_subject_oid = myEventWS.scheduleEvent(
                    config['studyIdentifier'], study_subject_id,
                    config['studyEventOID'], 'def', '1980-01-01')
                #TODO: add errorhandling for fail of scheduling event

                # now add the combination id oid to the util database
                # only add the pair if the oid starts with SS_
                if (study_subject_oid.find('SS_') == 0):
                    conn_util.AddSubjectToDB(study_subject_oid,
                                             study_subject_id)

            # extra check: maybe we somehow missed the study subject oid and then there will be no record in table study_subject_oc
            if (conn_util.DLookup(
                    'study_subject_oid', 'odkoc.study_subject_oc',
                    'study_subject_id=\'%s\'' % (study_subject_id)) == ''):
                new_subject = PGSubject(study_subject_id)
                conn_util.AddSubjectToDB(new_subject.GetSSOID(),
                                         study_subject_id)

            # only import the data if this hasn't been done before
            if (not conn_util.UriComplete(odk_result['_URI'])):
                # now we should have the study subject id plus oid, so we can compose the odm for import
                study_subject_id = odk_result[
                    'GENERAL_INFORMATION_STUDY_SUBJECT_ID']
                study_subject_oid = conn_util.DLookup(
                    'study_subject_oid', 'odkoc.study_subject_oc',
                    'study_subject_id=\'%s\'' % (study_subject_id))
                complete_odm = compose_lamp(study_subject_oid, odk_result)

                import_results = myDataWS.importData(complete_odm)
                if (import_results.find('Success') != 0):
                    # if something went wrong, print it
                    print(import_results)
                    import_lamp_success = False
                else:
                    # if our import was successful, then make a note of it
                    import_lamp_success = True
                    my_report.append_to_report('lamp ' + study_subject_id +
                                               ': ' + import_results)

                if (import_lamp_success):
                    conn_util.MarkUriComplete(odk_result['_URI'], 'lamp')

        # some book keeping to check if we must continue looping, or break the loop
        # first sleep a bit, so we do not eat up all CPU
        time.sleep(int(config['sleep_this_long']))
        current_time = datetime.datetime.now()
        difference = current_time - start_time
        loop_this_long = config['loop_this_long']
        max_diff_list = loop_this_long.split(sep=':')
        max_difference = datetime.timedelta(hours=int(max_diff_list[0]),
                                            minutes=int(max_diff_list[1]),
                                            seconds=int(max_diff_list[2]))
        if difference > max_difference:
            break

    my_report.append_to_report('finished looping from %s till %s.' %
                               (start_time, current_time))
    # close the file so we can send it
    my_report.close_file()
Пример #16
0
    def old_train(self,
                  test_params: TestParams = None,
                  reward_from_agent=True):
        start_time = time.time()

        rp.report("> Training")
        if self.logger.ep_count == 0:
            self.logger = Logger(
                self.max_training_episodes,
                self.agent.__class__.__name__,
                self.agent.model.__class__.__name__,
                self.agent.model,
                self.agent.action_wrapper.__class__.__name__,
                self.agent.action_wrapper.get_action_space_dim(),
                self.agent.action_wrapper.get_named_actions(),
                self.agent.state_builder.__class__.__name__,
                self.agent.reward_builder.__class__.__name__,
                self.env.__class__.__name__,
                log_actions=self.log_actions,
                episode_batch_avg_calculation=self.
                episode_batch_avg_calculation,
                rolling_avg_window_size=self.rolling_avg_window_size)

        if test_params != None:
            test_params.logger = self.logger

        while self.curr_training_episodes < self.max_training_episodes:
            self.curr_training_episodes += 1
            self.env.start()

            # Reset the environment
            obs = self.env.reset()
            step_reward = 0
            done = False
            # Passing the episode to the agent reset, so that it can be passed to model reset
            # Allowing the model to track the episode number, and decide if it should diminish the
            # Learning Rate, depending on the currently selected strategy.
            self.agent.reset(self.curr_training_episodes)

            ep_reward = 0
            victory = False

            ep_actions = np.zeros(
                self.agent.action_wrapper.get_action_space_dim())
            self.logger.record_episode_start()

            for step in range(self.max_steps_training):

                # Choosing an action and passing it to our env.step() in order to act on our environment
                action = self.agent.step(obs, done, is_testing=False)
                obs, default_reward, done = self.env.step(action)

                is_last_step = step == self.max_steps_training - 1
                done = done or is_last_step

                # Checking whether or not to use the reward from the reward builder so we can pass that to the agent
                if reward_from_agent:
                    step_reward = self.agent.get_reward(
                        obs, default_reward, done)
                else:
                    step_reward = default_reward

                # Making the agent learn
                self.agent.learn(obs, step_reward, done)

                # Adding our step reward to the total count of the episode's reward
                ep_reward += step_reward

                ep_actions[self.agent.previous_action] += 1

                if done:
                    victory = default_reward == 1
                    agent_info = {
                        "Learning rate": self.agent.model.learning_rate,
                        "Gamma": self.agent.model.gamma,
                        "Epsilon": self.agent.model.epsilon_greedy,
                    }
                    self.logger.record_episode(ep_reward, victory, step + 1,
                                               agent_info, ep_actions)
                    break

            self.logger.log_ep_stats()

            #check if user wants to pause training and test agent
            #if self.do_reward_test and self.curr_training_episodes % self.episode_batch_avg_calculation == 0 and self.curr_training_episodes > 1:
            if self.do_reward_test and self.curr_training_episodes % self.episode_batch_avg_calculation == 0:
                self.test_agent()

            if self.enable_save and self.curr_training_episodes > 0 and self.curr_training_episodes % self.save_every == 0:
                self.save(self.full_save_path)

                #if we have done tests along the training
                #save all loggers for further detailed analysis
                #this was needed because the play() method
                #was saving these loggers every test, slowing down
                #training a lot. Putting this code here allows
                #to save them once and optimize training time.
                if self.do_reward_test and len(
                        self.inside_training_test_loggers) > 0:
                    for idx in range(
                            len(self.logger.ep_avg_batch_rewards_episodes)):
                        logger_dict = self.inside_training_test_loggers[idx]
                        if not logger_dict["saved"]:
                            episode = self.logger.ep_avg_batch_rewards_episodes[
                                idx]
                            backup_full_save_path = self.full_save_path
                            self.full_save_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format(
                                episode)
                            self.make_persistance_dirs(self.log_actions)
                            logger_dict["logger"].save(self.full_save_path)
                            logger_dict["saved"] = True
                            self.full_save_path = backup_full_save_path

            if test_params != None and self.curr_training_episodes % test_params.test_steps == 0 and episode != 0:
                test_params.current_ep_count = self.curr_training_episodes
                self.play(test_params.num_matches, test_params.max_steps,
                          test_params)

                # Stops training if reward threshold was reached in play testing
                if test_params.reward_threshold != None and test_params.reward_threshold <= test_params.logger.play_rewards_avg[
                        -1]:
                    rp.report("> Reward threshold was reached!")
                    rp.report("> Stopping training")
                    break

        end_time = time.time()
        rp.report("\n> Training duration: {} seconds".format(end_time -
                                                             start_time))

        self.logger.log_train_stats()
        self.logger.plot_train_stats()
        # Saving the model when the training has ended
        if self.enable_save:
            self.save(self.full_save_path)
            #if we have done tests along the training
            #save all loggers for further detailed analysis
            #this was needed because the play() method
            #was saving these loggers every test, slowing down
            #training a lot. Putting this code here allows
            #to save them once and optimize training time.
            if self.do_reward_test and len(
                    self.inside_training_test_loggers) > 0:
                for idx in range(len(
                        self.logger.ep_avg_batch_rewards_episodes)):
                    logger_dict = self.inside_training_test_loggers[idx]
                    if not logger_dict["saved"]:
                        episode = self.logger.ep_avg_batch_rewards_episodes[
                            idx]
                        backup_full_save_path = self.full_save_path
                        self.full_save_path = self.full_save_path + os.path.sep + "inside_training_play_files" + os.path.sep + "test_at_training_episode_{}".format(
                            episode)
                        self.make_persistance_dirs(self.log_actions)
                        logger_dict["logger"].save(self.full_save_path)
                        logger_dict["saved"] = True
                        self.full_save_path = backup_full_save_path
Пример #17
0
    def old_play(self, test_params=None, reward_from_agent=True):
        rp.report("\n\n> Playing")

        self.logger = Logger(
            self.max_test_episodes,
            self.agent.__class__.__name__,
            self.agent.model.__class__.__name__,
            self.agent.model,
            self.agent.action_wrapper.__class__.__name__,
            self.agent.action_wrapper.get_action_space_dim(),
            self.agent.action_wrapper.get_named_actions(),
            self.agent.state_builder.__class__.__name__,
            self.agent.reward_builder.__class__.__name__,
            self.env.__class__.__name__,
            log_actions=self.log_actions,
            episode_batch_avg_calculation=self.episode_batch_avg_calculation,
            rolling_avg_window_size=self.rolling_avg_window_size)

        while self.curr_playing_episodes < self.max_test_episodes:
            self.curr_playing_episodes += 1
            self.env.start()

            # Reset the environment
            obs = self.env.reset()
            step_reward = 0
            done = False
            # Passing the episode to the agent reset, so that it can be passed to model reset
            # Allowing the model to track the episode number, and decide if it should diminish the
            # Learning Rate, depending on the currently selected strategy.
            self.agent.reset(self.curr_playing_episodes)

            ep_reward = 0
            victory = False

            ep_actions = np.zeros(
                self.agent.action_wrapper.get_action_space_dim())
            self.logger.record_episode_start()

            for step in range(self.max_steps_testing):
                action = self.agent.step(obs, done, is_testing=True)
                # Take the action (a) and observe the outcome state(s') and reward (r)
                obs, default_reward, done = self.env.step(action)

                is_last_step = step == self.max_steps_testing - 1
                done = done or is_last_step

                if reward_from_agent:
                    step_reward = self.agent.get_reward(
                        obs, default_reward, done)
                else:
                    step_reward = default_reward

                ep_reward += step_reward

                ep_actions[self.agent.previous_action] += 1

                # If done: finish episode
                if done:
                    victory = default_reward == 1
                    agent_info = {
                        "Learning rate": self.agent.model.learning_rate,
                        "Gamma": self.agent.model.gamma,
                        "Epsilon": self.agent.model.epsilon_greedy,
                    }
                    self.logger.record_episode(ep_reward, victory, step + 1,
                                               agent_info, ep_actions)
                    break

            self.logger.log_ep_stats()

        if test_params != None:
            test_params.logger.record_play_test(test_params.current_ep_count,
                                                self.logger.ep_rewards,
                                                self.logger.victories,
                                                self.max_test_episodes)
        else:
            # Only logs train stats if this is not a test, to avoid cluttering the interface with info
            self.logger.log_train_stats()

        # We need to save playing status as well
        if self.enable_save:
            self.logger.save(self.full_save_play_path)
            rp.save(self.full_save_play_path)
Пример #18
0
    def __init__(self, args, sess, train_model, test_model):
        """
        Call the constructor of the base class
        init summaries
        init loading data
        :param args:
        :param sess:
        :param model:
        :return:
        """
        super().__init__(args, sess, train_model, test_model)
        ##################################################################################
        # Init summaries

        # Summary variables
        self.scalar_summary_tags = ['mean_iou_on_val',
                                    'train-loss-per-epoch', 'val-loss-per-epoch',
                                    'train-acc-per-epoch', 'val-acc-per-epoch']
        self.images_summary_tags = [
            ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]),
            ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])]
        self.summary_tags = []
        self.summary_placeholders = {}
        self.summary_ops = {}
        # init summaries and it's operators
        self.init_summaries()
        # Create summary writer
        self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph)
        ##################################################################################
        # Init load data and generator
        self.generator = None
        if self.args.data_mode == "experiment_tfdata":
            self.data_session = None
            self.train_next_batch, self.train_data_len = self.init_tfdata(self.args.batch_size, self.args.abs_data_dir,
                                                                          (self.args.img_height, self.args.img_width),
                                                                          mode='train')
            self.num_iterations_training_per_epoch = self.train_data_len // self.args.batch_size
            self.generator = self.train_tfdata_generator
        elif self.args.data_mode == "experiment_h5":
            self.train_data = None
            self.train_data_len = None
            self.val_data = None
            self.val_data_len = None
            self.num_iterations_training_per_epoch = None
            self.num_iterations_validation_per_epoch = None
            self.load_train_data_h5()
            self.generator = self.train_h5_generator
        elif self.args.data_mode == "experiment_v2":
            self.targets_resize = self.args.targets_resize
            self.train_data = None
            self.train_data_len = None
            self.val_data = None
            self.val_data_len = None
            self.num_iterations_training_per_epoch = None
            self.num_iterations_validation_per_epoch = None
            self.load_train_data(v2=True)
            self.generator = self.train_generator
        elif self.args.data_mode == "experiment":
            self.train_data = None
            self.train_data_len = None
            self.val_data = None
            self.val_data_len = None
            self.num_iterations_training_per_epoch = None
            self.num_iterations_validation_per_epoch = None
            self.load_train_data()
            self.generator = self.train_generator
        elif self.args.data_mode == "test_tfdata":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_val_data()
            self.generator = self.test_tfdata_generator
        elif self.args.data_mode == "test":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_val_data()
            self.generator = self.test_generator
        elif self.args.data_mode == "test_eval":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.names_mapper = None
            self.load_test_data()
            self.generator = self.test_generator
        elif self.args.data_mode == "test_v2":
            self.targets_resize = self.args.targets_resize
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_val_data(v2=True)
            self.generator = self.test_generator
        elif self.args.data_mode == "video":
            self.args.data_mode = "test"
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_vid_data()
            self.generator = self.test_generator
        elif self.args.data_mode == "debug":
            print("Debugging photo loading..")
            #            self.debug_x= misc.imread('/data/menna/cityscapes/leftImg8bit/val/lindau/lindau_000048_000019_leftImg8bit.png')
            #            self.debug_y= misc.imread('/data/menna/cityscapes/gtFine/val/lindau/lindau_000048_000019_gtFine_labelIds.png')
            #            self.debug_x= np.expand_dims(misc.imresize(self.debug_x, (512,1024)), axis=0)
            #            self.debug_y= np.expand_dims(misc.imresize(self.debug_y, (512,1024)), axis=0)
            self.debug_x = np.load('data/debug/debug_x.npy')
            self.debug_y = np.load('data/debug/debug_y.npy')
            print("Debugging photo loaded")
        else:
            print("ERROR Please select a proper data_mode BYE")
            exit(-1)
        ##################################################################################
        # Init metrics class
        self.metrics = Metrics(self.args.num_classes)
        # Init reporter class
        if self.args.mode == 'train' or 'overfit':
            self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args)
        elif self.args.mode == 'test':
            self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
Пример #19
0
#!/usr/bin/env python3

from utils.reporter import Reporter

if __name__ == '__main__':
    reporter = Reporter()
    reporter.report_test_success()
Пример #20
0
#!/usr/bin/env python3

from utils.reporter import Reporter


if __name__ == '__main__':
    reporter = Reporter()
    reporter.report_release_success()
Пример #21
0
class Train(BasicTrain):
    """
    Trainer class
    """

    def __init__(self, args, sess, train_model, test_model):
        """
        Call the constructor of the base class
        init summaries
        init loading data
        :param args:
        :param sess:
        :param model:
        :return:
        """
        super().__init__(args, sess, train_model, test_model)
        ##################################################################################
        # Init summaries

        # Summary variables
        self.scalar_summary_tags = ['mean_iou_on_val',
                                    'train-loss-per-epoch', 'val-loss-per-epoch',
                                    'train-acc-per-epoch', 'val-acc-per-epoch']
        self.images_summary_tags = [
            ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]),
            ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])]
        self.summary_tags = []
        self.summary_placeholders = {}
        self.summary_ops = {}
        # init summaries and it's operators
        self.init_summaries()
        # Create summary writer
        self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph)
        ##################################################################################
        # Init load data and generator
        self.generator = None
        if self.args.data_mode == "experiment_tfdata":
            self.data_session = None
            self.train_next_batch, self.train_data_len = self.init_tfdata(self.args.batch_size, self.args.abs_data_dir,
                                                                          (self.args.img_height, self.args.img_width),
                                                                          mode='train')
            self.num_iterations_training_per_epoch = self.train_data_len // self.args.batch_size
            self.generator = self.train_tfdata_generator
        elif self.args.data_mode == "experiment_h5":
            self.train_data = None
            self.train_data_len = None
            self.val_data = None
            self.val_data_len = None
            self.num_iterations_training_per_epoch = None
            self.num_iterations_validation_per_epoch = None
            self.load_train_data_h5()
            self.generator = self.train_h5_generator
        elif self.args.data_mode == "experiment_v2":
            self.targets_resize = self.args.targets_resize
            self.train_data = None
            self.train_data_len = None
            self.val_data = None
            self.val_data_len = None
            self.num_iterations_training_per_epoch = None
            self.num_iterations_validation_per_epoch = None
            self.load_train_data(v2=True)
            self.generator = self.train_generator
        elif self.args.data_mode == "experiment":
            self.train_data = None
            self.train_data_len = None
            self.val_data = None
            self.val_data_len = None
            self.num_iterations_training_per_epoch = None
            self.num_iterations_validation_per_epoch = None
            self.load_train_data()
            self.generator = self.train_generator
        elif self.args.data_mode == "test_tfdata":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_val_data()
            self.generator = self.test_tfdata_generator
        elif self.args.data_mode == "test":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_val_data()
            self.generator = self.test_generator
        elif self.args.data_mode == "test_eval":
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.names_mapper = None
            self.load_test_data()
            self.generator = self.test_generator
        elif self.args.data_mode == "test_v2":
            self.targets_resize = self.args.targets_resize
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_val_data(v2=True)
            self.generator = self.test_generator
        elif self.args.data_mode == "video":
            self.args.data_mode = "test"
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_vid_data()
            self.generator = self.test_generator
        elif self.args.data_mode == "debug":
            print("Debugging photo loading..")
            #            self.debug_x= misc.imread('/data/menna/cityscapes/leftImg8bit/val/lindau/lindau_000048_000019_leftImg8bit.png')
            #            self.debug_y= misc.imread('/data/menna/cityscapes/gtFine/val/lindau/lindau_000048_000019_gtFine_labelIds.png')
            #            self.debug_x= np.expand_dims(misc.imresize(self.debug_x, (512,1024)), axis=0)
            #            self.debug_y= np.expand_dims(misc.imresize(self.debug_y, (512,1024)), axis=0)
            self.debug_x = np.load('data/debug/debug_x.npy')
            self.debug_y = np.load('data/debug/debug_y.npy')
            print("Debugging photo loaded")
        else:
            print("ERROR Please select a proper data_mode BYE")
            exit(-1)
        ##################################################################################
        # Init metrics class
        self.metrics = Metrics(self.args.num_classes)
        # Init reporter class
        if self.args.mode == 'train' or 'overfit':
            self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args)
        elif self.args.mode == 'test':
            self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)
            ##################################################################################

    def crop(self):
        sh = self.val_data['X'].shape
        temp_val_data = {'X': np.zeros((sh[0] * 2, sh[1], sh[2] // 2, sh[3]), self.val_data['X'].dtype),
                         'Y': np.zeros((sh[0] * 2, sh[1], sh[2] // 2), self.val_data['Y'].dtype)}
        for i in range(sh[0]):
            temp_val_data['X'][i * 2, :, :, :] = self.val_data['X'][i, :, :sh[2] // 2, :]
            temp_val_data['X'][i * 2 + 1, :, :, :] = self.val_data['X'][i, :, sh[2] // 2:, :]
            temp_val_data['Y'][i * 2, :, :] = self.val_data['Y'][i, :, :sh[2] // 2]
            temp_val_data['Y'][i * 2 + 1, :, :] = self.val_data['Y'][i, :, sh[2] // 2:]

        self.val_data = temp_val_data

    def init_tfdata(self, batch_size, main_dir, resize_shape, mode='train'):
        self.data_session = tf.Session()
        print("Creating the iterator for training data")
        with tf.device('/cpu:0'):
            segdl = SegDataLoader(main_dir, batch_size, (resize_shape[0], resize_shape[1]), resize_shape,
                                  # * 2), resize_shape,
                                  'data/cityscapes_tfdata/train.txt')
            iterator = Iterator.from_structure(segdl.data_tr.output_types, segdl.data_tr.output_shapes)
            next_batch = iterator.get_next()

            self.init_op = iterator.make_initializer(segdl.data_tr)
            self.data_session.run(self.init_op)

        print("Loading Validation data in memoryfor faster training..")
        self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"),
                         'Y': np.load(self.args.data_dir + "Y_val.npy")}
        # self.crop()
        # import cv2
        # cv2.imshow('crop1', self.val_data['X'][0,:,:,:])
        # cv2.imshow('crop2', self.val_data['X'][1,:,:,:])
        # cv2.imshow('seg1', self.val_data['Y'][0,:,:])
        # cv2.imshow('seg2', self.val_data['Y'][1,:,:])
        # cv2.waitKey()

        self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size
        #        self.num_iterations_validation_per_epoch = (
        #                                                       self.val_data_len + self.args.batch_size - 1) // self.args.batch_size
        self.num_iterations_validation_per_epoch = self.val_data_len // self.args.batch_size

        print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len))
        print("Val-shape-y -- " + str(self.val_data['Y'].shape))
        print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch))
        print("Validation data is loaded")

        return next_batch, segdl.data_len

    @timeit
    def load_overfit_data(self):
        print("Loading data..")
        self.train_data = {'X': np.load(self.args.data_dir + "X_train.npy"),
                           'Y': np.load(self.args.data_dir + "Y_train.npy")}
        self.train_data_len = self.train_data['X'].shape[0] - self.train_data['X'].shape[0] % self.args.batch_size
        self.num_iterations_training_per_epoch = (
                                                         self.train_data_len + self.args.batch_size - 1) // self.args.batch_size
        print("Train-shape-x -- " + str(self.train_data['X'].shape))
        print("Train-shape-y -- " + str(self.train_data['Y'].shape))
        print("Num of iterations in one epoch -- " + str(self.num_iterations_training_per_epoch))
        print("Overfitting data is loaded")

        print("Loading Validation data..")
        self.val_data = self.train_data
        self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size
        self.num_iterations_validation_per_epoch = (
                                                           self.val_data_len + self.args.batch_size - 1) // self.args.batch_size
        print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len))
        print("Val-shape-y -- " + str(self.val_data['Y'].shape))
        print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch))
        print("Validation data is loaded")

    def overfit_generator(self):
        start = 0
        new_epoch_flag = True
        idx = None
        while True:
            # init index array if it is a new_epoch
            if new_epoch_flag:
                if self.args.shuffle:
                    idx = np.random.choice(self.train_data_len, self.train_data_len, replace=False)
                else:
                    idx = np.arange(self.train_data_len)
                new_epoch_flag = False

            # select the mini_batches
            mask = idx[start:start + self.args.batch_size]
            x_batch = self.train_data['X'][mask]
            y_batch = self.train_data['Y'][mask]

            start += self.args.batch_size
            if start >= self.train_data_len:
                start = 0
                new_epoch_flag = True

            yield x_batch, y_batch

    def init_summaries(self):
        """
        Create the summary part of the graph
        :return:
        """
        with tf.variable_scope('train-summary-per-epoch'):
            for tag in self.scalar_summary_tags:
                self.summary_tags += tag
                self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag)
                self.summary_ops[tag] = tf.summary.scalar(tag, self.summary_placeholders[tag])
            for tag, shape in self.images_summary_tags:
                self.summary_tags += tag
                self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag)
                self.summary_ops[tag] = tf.summary.image(tag, self.summary_placeholders[tag], max_outputs=10)

    def add_summary(self, step, summaries_dict=None, summaries_merged=None):
        """
        Add the summaries to tensorboard
        :param step:
        :param summaries_dict:
        :param summaries_merged:
        :return:
        """
        if summaries_dict is not None:
            summary_list = self.sess.run([self.summary_ops[tag] for tag in summaries_dict.keys()],
                                         {self.summary_placeholders[tag]: value for tag, value in
                                          summaries_dict.items()})
            for summary in summary_list:
                self.summary_writer.add_summary(summary, step)
        if summaries_merged is not None:
            self.summary_writer.add_summary(summaries_merged, step)

    @timeit
    def load_train_data(self, v2=False):
        print("Loading Training data..")
        self.train_data = {'X': np.load(self.args.data_dir + "X_train.npy"),
                           'Y': np.load(self.args.data_dir + "Y_train.npy")}
        self.train_data = self.resize(self.train_data)

        if v2:
            out_shape = (self.train_data['Y'].shape[1] // self.targets_resize,
                         self.train_data['Y'].shape[2] // self.targets_resize)
            yy = np.zeros((self.train_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.train_data['Y'].dtype)
            for y in range(self.train_data['Y'].shape[0]):
                yy[y, ...] = misc.imresize(self.train_data['Y'][y, ...], out_shape, interp='nearest')
            self.train_data['Y'] = yy
        self.train_data_len = self.train_data['X'].shape[0]

        self.num_iterations_training_per_epoch = (
                                                         self.train_data_len + self.args.batch_size - 1) // self.args.batch_size

        print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len))
        print("Train-shape-y -- " + str(self.train_data['Y'].shape))
        print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch))
        print("Training data is loaded")

        print("Loading Validation data..")
        self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"),
                         'Y': np.load(self.args.data_dir + "Y_val.npy")}
        self.val_data['Y_large'] = self.val_data['Y']
        if v2:
            out_shape = (self.val_data['Y'].shape[1] // self.targets_resize,
                         self.val_data['Y'].shape[2] // self.targets_resize)
            yy = np.zeros((self.val_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.train_data['Y'].dtype)
            for y in range(self.val_data['Y'].shape[0]):
                yy[y, ...] = misc.imresize(self.val_data['Y'][y, ...], out_shape, interp='nearest')
            self.val_data['Y'] = yy

        self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size
        self.num_iterations_validation_per_epoch = (
                                                           self.val_data_len + self.args.batch_size - 1) // self.args.batch_size
        print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len))
        print("Val-shape-y -- " + str(self.val_data['Y'].shape))
        print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch))
        print("Validation data is loaded")

    @timeit
    def load_train_data_h5(self):
        print("Loading Training data..")
        self.train_data = h5py.File(self.args.data_dir + self.args.h5_train_file, 'r')
        self.train_data_len = self.args.h5_train_len
        self.num_iterations_training_per_epoch = (
                                                         self.train_data_len + self.args.batch_size - 1) // self.args.batch_size
        print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len))
        print("Train-shape-y -- " + str(self.train_data['Y'].shape))
        print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch))
        print("Training data is loaded")

        print("Loading Validation data..")
        self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"),
                         'Y': np.load(self.args.data_dir + "Y_val.npy")}
        self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size
        self.num_iterations_validation_per_epoch = (
                                                           self.val_data_len + self.args.batch_size - 1) // self.args.batch_size
        print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len))
        print("Val-shape-y -- " + str(self.val_data['Y'].shape))
        print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch))
        print("Validation data is loaded")

    @timeit
    def load_vid_data(self):
        print("Loading Video data..")
        self.test_data = {'X': np.load(self.args.data_dir + "X_vid.npy")}
        self.test_data['Y'] = np.zeros(self.test_data['X'].shape[:3])
        self.test_data_len = self.test_data['X'].shape[0]
        print("Vid-shape-x -- " + str(self.test_data['X'].shape))
        print("Vid-shape-y -- " + str(self.test_data['Y'].shape))
        self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size
        print("Video data is loaded")

    @timeit
    def load_val_data(self, v2=False):
        print("Loading Validation data..")
        self.test_data = {'X': np.load(self.args.data_dir + "X_val.npy"),
                          'Y': np.load(self.args.data_dir + "Y_val.npy")}
        self.test_data = self.resize(self.test_data)
        self.test_data['Y_large'] = self.test_data['Y']
        if v2:
            out_shape = (self.test_data['Y'].shape[1] // self.targets_resize,
                         self.test_data['Y'].shape[2] // self.targets_resize)
            yy = np.zeros((self.test_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.test_data['Y'].dtype)
            for y in range(self.test_data['Y'].shape[0]):
                yy[y, ...] = misc.imresize(self.test_data['Y'][y, ...], out_shape, interp='nearest')
            self.test_data['Y'] = yy

        self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size
        print("Validation-shape-x -- " + str(self.test_data['X'].shape))
        print("Validation-shape-y -- " + str(self.test_data['Y'].shape))
        self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size
        print("Validation data is loaded")

    @timeit
    def load_test_data(self):
        print("Loading Testing data..")
        self.test_data = {'X': np.load(self.args.data_dir + "X_test.npy")}
        self.names_mapper = {'X': np.load(self.args.data_dir + "xnames_test.npy"),
                             'Y': np.load(self.args.data_dir + "ynames_test.npy")}
        self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size
        print("Test-shape-x -- " + str(self.test_data['X'].shape))
        self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size
        print("Test data is loaded")

    def test_generator(self):
        start = 0
        new_epoch_flag = True
        idx = None
        while True:
            # init index array if it is a new_epoch
            if new_epoch_flag:
                if self.args.shuffle:
                    idx = np.random.choice(self.test_data_len, self.test_data_len, replace=False)
                else:
                    idx = np.arange(self.test_data_len)
                new_epoch_flag = False

            # select the mini_batches
            mask = idx[start:start + self.args.batch_size]
            x_batch = self.test_data['X'][mask]
            y_batch = self.test_data['Y'][mask]

            # update start idx
            start += self.args.batch_size

            if start >= self.test_data_len:
                start = 0
                new_epoch_flag = True

            yield x_batch, y_batch

    def train_generator(self):
        start = 0
        idx = np.random.choice(self.train_data_len, self.num_iterations_training_per_epoch * self.args.batch_size,
                               replace=True)
        while True:
            # select the mini_batches
            mask = idx[start:start + self.args.batch_size]
            x_batch = self.train_data['X'][mask]
            y_batch = self.train_data['Y'][mask]

            # update start idx
            start += self.args.batch_size

            yield x_batch, y_batch

            if start >= self.train_data_len:
                return

    def train_tfdata_generator(self):
        with tf.device('/cpu:0'):
            while True:
                x_batch, y_batch = self.data_session.run(self.train_next_batch)
                yield x_batch, y_batch[:, :, :, 0]

    def train_h5_generator(self):
        start = 0
        idx = np.random.choice(self.train_data_len, self.train_data_len,
                               replace=False)
        while True:
            # select the mini_batches
            mask = idx[start:start + self.args.batch_size]
            x_batch = self.train_data['X'][sorted(mask.tolist())]
            y_batch = self.train_data['Y'][sorted(mask.tolist())]

            # update start idx
            start += self.args.batch_size

            if start >= self.train_data_len:
                return

            yield x_batch, y_batch

    def resize(self, data):
        X = []
        Y = []
        for i in range(data['X'].shape[0]):
            X.append(misc.imresize(data['X'][i, ...], (self.args.img_height, self.args.img_width)))
            Y.append(misc.imresize(data['Y'][i, ...], (self.args.img_height, self.args.img_width), 'nearest'))
        data['X'] = np.asarray(X)
        data['Y'] = np.asarray(Y)
        return data

    def train(self):
        print("Training mode will begin NOW ..")
        # curr_lr= self.model.args.learning_rate
        for cur_epoch in range(self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1):

            # init tqdm and get the epoch value
            tt = tqdm(self.generator(), total=self.num_iterations_training_per_epoch,
                      desc="epoch-" + str(cur_epoch) + "-")

            # init the current iterations
            cur_iteration = 0

            # init acc and loss lists
            loss_list = []
            acc_list = []

            # loop by the number of iterations
            for x_batch, y_batch in tt:

                # get the cur_it for the summary
                cur_it = self.model.global_step_tensor.eval(self.sess)

                # Feed this variables to the network
                feed_dict = {self.model.x_pl: x_batch,
                             self.model.y_pl: y_batch,
                             self.model.is_training: True
                             #                             self.model.curr_learning_rate:curr_lr
                             }

                # Run the feed forward but the last iteration finalize what you want to do
                if cur_iteration < self.num_iterations_training_per_epoch - 1:

                    # run the feed_forward
                    _, loss, acc, summaries_merged = self.sess.run(
                        [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries],
                        feed_dict=feed_dict)
                    # log loss and acc
                    loss_list += [loss]
                    acc_list += [acc]
                    # summarize
                #                    self.add_summary(cur_it, summaries_merged=summaries_merged)

                else:
                    # run the feed_forward
                    if self.args.data_mode == 'experiment_v2':
                        _, loss, acc, summaries_merged = self.sess.run(
                            [self.model.train_op, self.model.loss, self.model.accuracy,
                             self.model.merged_summaries],
                            feed_dict=feed_dict)
                    else:
                        _, loss, acc, summaries_merged, segmented_imgs = self.sess.run(
                            [self.model.train_op, self.model.loss, self.model.accuracy,
                             self.model.merged_summaries, self.model.segmented_summary],
                            feed_dict=feed_dict)

                    # log loss and acc
                    loss_list += [loss]
                    acc_list += [acc]
                    total_loss = np.mean(loss_list)
                    total_acc = np.mean(acc_list)
                    # summarize
                    summaries_dict = dict()
                    summaries_dict['train-loss-per-epoch'] = total_loss
                    summaries_dict['train-acc-per-epoch'] = total_acc

                    if self.args.data_mode != 'experiment_v2':
                        summaries_dict['train_prediction_sample'] = segmented_imgs
                    # self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged)

                    # report
                    self.reporter.report_experiment_statistics('train-acc', 'epoch-' + str(cur_epoch), str(total_acc))
                    self.reporter.report_experiment_statistics('train-loss', 'epoch-' + str(cur_epoch), str(total_loss))
                    self.reporter.finalize()

                    # Update the Global step
                    self.model.global_step_assign_op.eval(session=self.sess,
                                                          feed_dict={self.model.global_step_input: cur_it + 1})

                    # Update the Cur Epoch tensor
                    # it is the last thing because if it is interrupted it repeat this
                    self.model.global_epoch_assign_op.eval(session=self.sess,
                                                           feed_dict={self.model.global_epoch_input: cur_epoch + 1})

                    # print in console
                    tt.close()
                    print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[
                                                                                                        :6])

                    # Break the loop to finalize this epoch
                    break

                # Update the Global step
                self.model.global_step_assign_op.eval(session=self.sess,
                                                      feed_dict={self.model.global_step_input: cur_it + 1})

                # update the cur_iteration
                cur_iteration += 1

            # Save the current checkpoint
            if cur_epoch % self.args.save_every == 0:
                self.save_model()

            # Test the model on validation
            if cur_epoch % self.args.test_every == 0:
                self.test_per_epoch(step=self.model.global_step_tensor.eval(self.sess),
                                    epoch=self.model.global_epoch_tensor.eval(self.sess))
        #            if cur_epoch % self.args.learning_decay_every == 0:
        #                curr_lr= curr_lr*self.args.learning_decay
        #                print('Current learning rate is ', curr_lr)

        print("Training Finished")

    def test_per_epoch(self, step, epoch):
        print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..")

        # init tqdm and get the epoch value
        tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch,
                  desc="Val-epoch-" + str(epoch) + "-")

        # init acc and loss lists
        loss_list = []
        acc_list = []
        inf_list = []

        # idx of minibatch
        idx = 0

        # reset metrics
        self.metrics.reset()

        # get the maximum iou to compare with and save the best model
        max_iou = self.model.best_iou_tensor.eval(self.sess)

        # loop by the number of iterations
        for cur_iteration in tt:
            # load minibatches
            x_batch = self.val_data['X'][idx:idx + self.args.batch_size]
            y_batch = self.val_data['Y'][idx:idx + self.args.batch_size]
            if self.args.data_mode == 'experiment_v2':
                y_batch_large = self.val_data['Y_large'][idx:idx + self.args.batch_size]

            # update idx of minibatch
            idx += self.args.batch_size

            # Feed this variables to the network
            feed_dict = {self.model.x_pl: x_batch,
                         self.model.y_pl: y_batch,
                         self.model.is_training: False
                         }

            # Run the feed forward but the last iteration finalize what you want to do
            if cur_iteration < self.num_iterations_validation_per_epoch - 1:

                start = time.time()
                # run the feed_forward

                out_argmax, loss, acc, summaries_merged = self.sess.run(
                    [self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries],
                    feed_dict=feed_dict)

                end = time.time()
                # log loss and acc
                loss_list += [loss]
                acc_list += [acc]
                inf_list += [end - start]
                if self.args.data_mode == 'experiment_v2':
                    yy = np.zeros((out_argmax.shape[0], y_batch_large.shape[1], y_batch_large.shape[2]),
                                  dtype=np.uint32)
                    out_argmax = np.asarray(out_argmax, dtype=np.uint8)
                    for y in range(out_argmax.shape[0]):
                        yy[y, ...] = misc.imresize(out_argmax[y, ...], y_batch_large.shape[1:], interp='nearest')
                    y_batch = y_batch_large
                    out_argmax = yy

                # log metrics
                self.metrics.update_metrics_batch(out_argmax, y_batch)

            else:
                start = time.time()
                # run the feed_forward
                if self.args.data_mode == 'experiment_v2':  # Issues in concatenating gt and img with diff sizes now for segmented_imgs
                    out_argmax, acc = self.sess.run(
                        [self.test_model.out_argmax, self.test_model.accuracy],
                        feed_dict=feed_dict)
                else:
                    out_argmax, acc, segmented_imgs = self.sess.run(
                        [self.test_model.out_argmax, self.test_model.accuracy, self.test_model.segmented_summary],
                        feed_dict=feed_dict)

                end = time.time()
                # log loss and acc
                acc_list += [acc]
                inf_list += [end - start]
                # log metrics
                self.metrics.update_metrics_batch(out_argmax, y_batch)
                # mean over batches
                total_acc = np.mean(acc_list)
                mean_iou = self.metrics.compute_final_metrics(self.num_iterations_validation_per_epoch)
                mean_iou_arr = self.metrics.iou
                mean_inference = str(np.mean(inf_list)) + '-seconds'
                # summarize
                summaries_dict = dict()
                summaries_dict['val-acc-per-epoch'] = total_acc
                summaries_dict['mean_iou_on_val'] = mean_iou
                if self.args.data_mode != 'experiment_v2':  # Issues in concatenating gt and img with diff sizes now for segmented_imgs
                    summaries_dict['val_prediction_sample'] = segmented_imgs
                #                self.add_summary(step, summaries_dict=summaries_dict, summaries_merged=summaries_merged)

                # report
                self.reporter.report_experiment_statistics('validation-acc', 'epoch-' + str(epoch), str(total_acc))
                self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch),
                                                           str(mean_inference))
                self.reporter.report_experiment_validation_iou('epoch-' + str(epoch), str(mean_iou), mean_iou_arr)
                self.reporter.finalize()

                # print in console
                tt.close()
                print("Val-epoch-" + str(epoch) + "-" +
                      "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou))
                print("Last_max_iou: " + str(max_iou))
                if mean_iou > max_iou:
                    print("This validation got a new best iou. so we will save this one")
                    # save the best model
                    self.save_best_model()
                    # Set the new maximum
                    self.model.best_iou_assign_op.eval(session=self.sess,
                                                       feed_dict={self.model.best_iou_input: mean_iou})
                else:
                    print("hmm not the best validation epoch :/..")
                break

                # Break the loop to finalize this epoch

    def linknet_postprocess(self, gt):
        gt2 = gt - 1
        gt2[gt == -1] = 19
        return gt2

    def test(self, pkl=False):
        print("Testing mode will begin NOW..")

        # load the best model checkpoint to test on it
        if not pkl:
            self.load_best_model()

        # init tqdm and get the epoch value
        tt = tqdm(range(self.test_data_len))
        # naming = np.load(self.args.data_dir + 'names_train.npy')

        # init acc and loss lists
        acc_list = []
        img_list = []

        # idx of image
        idx = 0

        # reset metrics
        self.metrics.reset()

        # loop by the number of iterations
        for cur_iteration in tt:
            # load mini_batches
            x_batch = self.test_data['X'][idx:idx + 1]
            y_batch = self.test_data['Y'][idx:idx + 1]
            if self.args.data_mode == 'test_v2':
                y_batch_large = self.test_data['Y_large'][idx:idx + 1]

            idx += 1

            # Feed this variables to the network
            if self.args.random_cropping:
                feed_dict = {self.test_model.x_pl_before: x_batch,
                             self.test_model.y_pl_before: y_batch,
                             self.test_model.is_training: False,
                             }
            else:
                feed_dict = {self.test_model.x_pl: x_batch,
                             self.test_model.y_pl: y_batch,
                             self.test_model.is_training: False
                             }

            # run the feed_forward
            if self.args.data_mode == 'test_v2':
                out_argmax, acc = self.sess.run(
                    [self.test_model.out_argmax, self.test_model.accuracy],
                    feed_dict=feed_dict)
            else:
                out_argmax, acc, segmented_imgs = self.sess.run(
                    [self.test_model.out_argmax, self.test_model.accuracy,
                     # self.test_model.merged_summaries, self.test_model.segmented_summary],
                     self.test_model.segmented_summary],
                    feed_dict=feed_dict)

            if self.args.data_mode == 'test_v2':
                yy = np.zeros((out_argmax.shape[0], y_batch_large.shape[1], y_batch_large.shape[2]), dtype=np.uint32)
                out_argmax = np.asarray(out_argmax, dtype=np.uint8)
                for y in range(out_argmax.shape[0]):
                    yy[y, ...] = misc.imresize(out_argmax[y, ...], y_batch_large.shape[1:], interp='nearest')
                y_batch = y_batch_large
                out_argmax = yy

            if pkl:
                out_argmax[0] = self.linknet_postprocess(out_argmax[0])
                segmented_imgs = decode_labels(out_argmax, 20)

            # print('mean preds ', out_argmax.mean())
            # np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy', out_argmax[0])
            if self.args.data_mode == 'test':
                plt.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0])

            # log loss and acc
            acc_list += [acc]

            # log metrics
            if self.args.random_cropping:
                y1 = np.expand_dims(y_batch[0, :, :512], axis=0)
                y2 = np.expand_dims(y_batch[0, :, 512:], axis=0)
                y_batch = np.concatenate((y1, y2), axis=0)
                self.metrics.update_metrics(out_argmax, y_batch, 0, 0)
            else:
                self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0)

        # mean over batches
        total_loss = 0
        total_acc = np.mean(acc_list)
        mean_iou = self.metrics.compute_final_metrics(self.test_data_len)

        # print in console
        tt.close()
        print("Here the statistics")
        print("Total_loss: " + str(total_loss))
        print("Total_acc: " + str(total_acc)[:6])
        print("mean_iou: " + str(mean_iou))

        print("Plotting imgs")
        for i in range(len(img_list)):
            plt.imsave(self.args.imgs_dir + 'test_' + str(i) + '.png', img_list[i])

    def test_eval(self, pkl=False):
        print("Testing mode will begin NOW..")

        # load the best model checkpoint to test on it
        if not pkl:
            self.load_best_model()

        # init tqdm and get the epoch value
        tt = tqdm(range(self.test_data_len))

        # idx of image
        idx = 0

        # loop by the number of iterations
        for cur_iteration in tt:
            # load mini_batches
            x_batch = self.test_data['X'][idx:idx + 1]

            # Feed this variables to the network
            if self.args.random_cropping:
                feed_dict = {self.test_model.x_pl_before: x_batch,
                             self.test_model.is_training: False,
                             }
            else:
                feed_dict = {self.test_model.x_pl: x_batch,
                             self.test_model.is_training: False
                             }

            # run the feed_forward
            out_argmax, segmented_imgs = self.sess.run(
                [self.test_model.out_argmax,
                 self.test_model.segmented_summary],
                feed_dict=feed_dict)

            if pkl:
                out_argmax[0] = self.linknet_postprocess(out_argmax[0])
                segmented_imgs = decode_labels(out_argmax, 20)

            # Colored results for visualization
            colored_save_path = self.args.out_dir + 'imgs/' + str(self.names_mapper['Y'][idx])
            if not os.path.exists(os.path.dirname(colored_save_path)):
                os.makedirs(os.path.dirname(colored_save_path))
            plt.imsave(colored_save_path, segmented_imgs[0])

            # Results for official evaluation
            save_path = self.args.out_dir + 'results/' + str(self.names_mapper['Y'][idx])
            if not os.path.exists(os.path.dirname(save_path)):
                os.makedirs(os.path.dirname(save_path))
            output = postprocess(out_argmax[0])
            misc.imsave(save_path, misc.imresize(output, [1024, 2048], 'nearest'))

            idx += 1

        # print in console
        tt.close()

    def test_inference(self):
        """
        Like the testing function but this one is for calculate the inference time
        and measure the frame per second
        """
        print("INFERENCE mode will begin NOW..")

        # load the best model checkpoint to test on it
        self.load_best_model()

        # output_node: network/output/Argmax
        # input_node: network/input/Placeholder
        #        for n in tf.get_default_graph().as_graph_def().node:
        #            if 'input' in n.name:#if 'Argmax' in n.name:
        #                import pdb; pdb.set_trace()
        print("Saving graph...")
        tf.train.write_graph(self.sess.graph_def, ".", 'graph.pb')
        print("Graph saved successfully.\n\n")
        exit(1)

        # init tqdm and get the epoch value
        tt = tqdm(range(self.test_data_len))

        # idx of image
        idx = 0

        # create the FPS Meter
        fps_meter = FPSMeter()

        # loop by the number of iterations
        for cur_iteration in tt:
            # load mini_batches
            x_batch = self.test_data['X'][idx:idx + 1]
            y_batch = self.test_data['Y'][idx:idx + 1]

            # update idx of mini_batch
            idx += 1

            # Feed this variables to the network
            if self.args.random_cropping:
                feed_dict = {self.test_model.x_pl_before: x_batch,
                             self.test_model.y_pl_before: y_batch
                             #                             self.test_model.is_training: False,
                             }
            else:
                feed_dict = {self.test_model.x_pl: x_batch,
                             self.test_model.y_pl: y_batch
                             #                             self.test_model.is_training: False
                             }

            # calculate the time of one inference
            start = time.time()

            # run the feed_forward
            _ = self.sess.run(
                [self.test_model.out_argmax],
                feed_dict=feed_dict)

            # update the FPS meter
            fps_meter.update(time.time() - start)

        fps_meter.print_statistics()

    def finalize(self):
        self.reporter.finalize()
        self.summary_writer.close()
        self.save_model()

    def debug_layers(self):
        """
        This function will be responsible for output all outputs of all layers and dump them in a pickle

        :return:
        """
        print("Debugging mode will begin NOW..")

        layers = tf.get_collection('debug_layers')
        print("ALL Layers in the collection that i wanna to run {} layer".format(len(layers)))
        for layer in layers:
            print(layer)

        # exit(0)

        # reset metrics
        self.metrics.reset()

        print('mean image ', self.debug_x.mean())
        print('mean gt ', self.debug_y.mean())

        self.debug_y = self.linknet_preprocess_gt(self.debug_y)

        feed_dict = {self.test_model.x_pl: self.debug_x,
                     self.test_model.y_pl: self.debug_y,
                     self.test_model.is_training: False
                     }

        #        var = [v for v in tf.all_variables() if v.op.name == "network/decoder_block_4/deconv/deconv/weights"]
        #        conv_w= self.sess.run(var[0])
        #        var = [v for v in tf.all_variables() if v.op.name == "network/decoder_block_4/deconv/deconv/biases"]
        #        bias= self.sess.run(var[0])

        # run the feed_forward
        out_layers = self.sess.run(layers, feed_dict=feed_dict)
        for layer in out_layers:
            print(layer.shape)

        #        dict_out= torchfile.load('out_networks_layers/dict_out.t7')
        ##        init= tf.constant_initializer(conv_w)
        ##        conv_w1 = tf.get_variable('my_weights', [3,3,128,128], tf.float32, initializer=init, trainable=True)
        #        pp= tf.nn.relu(layers[39])
        #        out_relu= self.sess.run(pp, feed_dict={self.test_model.x_pl: self.debug_x,
        #                     self.test_model.y_pl: self.debug_y,
        #                     self.test_model.is_training: False
        #                     })
        ##        pp = tf.nn.conv2d_transpose(layers[39], conv_w, (1,32,64,128), strides=(1,2,2,1), padding="SAME")
        ##        pp= tf.image.resize_images(layers[39], (32,64))
        ##        pp = tf.nn.conv2d(pp, conv_w, strides=(1,1,1,1), padding="SAME")
        ##        bias1= tf.get_variable('my_bias', 128, tf.float32, tf.constant_initializer(bias))
        #        pp = tf.nn.bias_add(pp, bias)
        #        #self.sess.run(conv_w1.initializer)
        #        #self.sess.run(bias1.initializer)
        #        out_deconv= self.sess.run(pp, feed_dict={self.test_model.x_pl: self.debug_x,
        #                     self.test_model.y_pl: self.debug_y,
        #                     self.test_model.is_training: False
        #                     })
        #        out_deconv_direct= self.sess.run(layers[40], feed_dict={self.test_model.x_pl: self.debug_x,
        #                     self.test_model.y_pl: self.debug_y,
        #                     self.test_model.is_training: False
        #                     })
        #        pdb.set_trace()

        # print(out_layers)
        # exit(0)

        # dump them in a pickle
        with open("out_networks_layers/out_linknet_layers.pkl", "wb") as f:
            pickle.dump(out_layers, f, protocol=2)

        # run the feed_forward again to see argmax and segmented
        out_argmax, segmented_imgs = self.sess.run(
            [self.test_model.out_argmax,
             self.test_model.segmented_summary],
            feed_dict=feed_dict)

        print('mean preds ', out_argmax[0].mean())

        plt.imsave(self.args.out_dir + 'imgs/' + 'debug.png', segmented_imgs[0])

        self.metrics.update_metrics(out_argmax[0], self.debug_y, 0, 0)

        mean_iou = self.metrics.compute_final_metrics(1)

        print("mean_iou_of_debug: " + str(mean_iou))
Пример #22
0
def cycle_through_syncs():
    my_report = Reporter()
    start_time = datetime.datetime.now()
    my_report.append_to_report('cycle started at ' + str(start_time))
    # read configuration file for usernames and passwords and other parameters
    config = readDictFile('oli.config')
    # read with combinations of StudyEventOIDs and LimeSurvey sids
    event_survey_pairs = readDictFile('event_survey_pairs')

    # initialise the oc-webservice
    myWebService = studySubjectWS(config['userName'], config['password'],
                                  config['baseUrl'])
    myDataWS = dataWS(config['userName'], config['password'],
                      config['baseUrl'])

    # create a connection to the postgresql database
    conn = ConnToOliDB()
    my_report.append_to_report(conn.init_result)

    while True:
        # retrieve all StudySubjectEvents, using the webservice
        allStudySubjectEvents = myWebService.getListStudySubjectEvents(
            config['studyIdentifier'])
        # now we have the StudySubjectIDs, run them against the postgresql table subjects

        # retrieve the subjects, using the connection to the postgresql database
        subjects_in_db = conn.ReadSubjectsFromDB()

        for studysubject_event in allStudySubjectEvents:
            # check if StudySubjectID is already in pg_database
            add_subject_to_db = True
            for subject_in_db in subjects_in_db:
                # check if we must check this event
                if (studysubject_event[0] == subject_in_db[1]):
                    add_subject_to_db = False
            if (add_subject_to_db):
                myPgSubject = PGSubject(studysubject_event[0])
                conn.AddSubjectsToDB([(myPgSubject.GetSSOID(),
                                       studysubject_event[0])])
                my_report.append_to_report('added %s to database' %
                                           studysubject_event[0])

        # now all StudySubjects in OpenClinica are also in our postgresql-database
        # so we refresh our list
        subjects_in_db = conn.ReadSubjectsFromDB()

        # collecting LimeSurvey data
        # Make a session, which is a bit of overhaed, but the script will be running for hours.
        api = LimeSurveyRemoteControl2API(config['lsUrl'])
        session_req = api.sessions.get_session_key(config['lsUser'],
                                                   config['lsPassword'])
        session_key = session_req.get('result')

        # initialise a new list for all tokens of all surveys
        # so we can check if a new token must be created
        all_tokens = []
        for event_oid, sid in event_survey_pairs.items():
            participants_req = api.tokens.list_participants(session_key, sid)
            participants = participants_req.get('result')
            for participant in participants:
                #loop through the participants, but only if there are any
                if participant != 'status':
                    p_info = participant.get('participant_info')
                    all_tokens.append((p_info.get('firstname'), event_oid, sid,
                                       participant.get('token'),
                                       participant.get('completed')))

        for studysubject_event in allStudySubjectEvents:
            # check if we must check this event
            if studysubject_event[1] in event_survey_pairs:
                # yes, we must check this event
                blnAddTokens = True
                for one_token in all_tokens:
                    if one_token[0] == studysubject_event[0] and one_token[
                            1] == studysubject_event[1]:
                        # a token exists
                        blnAddTokens = False

                if blnAddTokens:
                    #self._logger.debug("add token for " + studysubject_event[0] + ", " + studysubject_event[1])
                    print("add token for " + studysubject_event[0] + " " +
                          str(event_survey_pairs[studysubject_event[1]]) +
                          ", " + studysubject_event[1])
                    participant_data = {'firstname': studysubject_event[0]}
                    #add_participant_req =
                    api.tokens.add_participants(
                        session_key, event_survey_pairs[studysubject_event[1]],
                        participant_data)
                    my_report.append_to_report(
                        'created token for survey %s for subject %s' %
                        (sid, studysubject_event[0]))

        # we may have added tokens, so refresh all_tokens
        # TODO: lets's make this a method
        all_tokens = []
        for event_oid, sid in event_survey_pairs.items():
            participants_req = api.tokens.list_participants(session_key, sid)
            participants = participants_req.get('result')
            for participant in participants:
                #loop through the participants, but only if there are any
                if participant != 'status':
                    p_info = participant.get('participant_info')
                    all_tokens.append((p_info.get('firstname'), event_oid, sid,
                                       participant.get('token'),
                                       participant.get('completed')))

        # now import the LimeSurvey results into OpenClinica
        # sorted by study subject id
        sorted_tokens = sorted(all_tokens, key=itemgetter(0))
        last_ssid = 'x'
        lime_survey_header = 'ev. token  completed&#10;---------------------------&#10;'
        lime_survey_data_to_import = lime_survey_header
        for token in sorted_tokens:
            survey_friendly_name = conn.DLookup("friendly_name", "ls_sids",
                                                "ls_sid=%d" % (int(token[2])))
            if last_ssid != token[0]:
                # new study subject ID, so write the previous one
                ssoid = conn.DLookup("study_subject_oid", "subjects",
                                     "study_subject_id='%s'" % (last_ssid))
                # skip the start-value
                if last_ssid != 'x':
                    ls_data_in_db = conn.DLookup(
                        "ls_data", "subjects",
                        "study_subject_oid='%s'" % (ssoid))
                    if lime_survey_data_to_import != ls_data_in_db:
                        myImport = myDataWS.importLSData(
                            ssoid, lime_survey_data_to_import)
                        conn.WriteLSDataToDB(ssoid, lime_survey_data_to_import,
                                             myImport)
                        my_report.append_to_report(
                            'wrote ls_data for subject %s 1' % (ssoid))

                # reset the variables
                last_ssid = token[0]
                lime_survey_data_to_import = lime_survey_header + survey_friendly_name + ' ' + token[
                    3] + ' ' + token[4] + '&#10;'
            else:
                lime_survey_data_to_import = lime_survey_data_to_import + survey_friendly_name + ' ' + token[
                    3] + ' ' + token[4] + '&#10;'

        # print the last one
        ssoid = conn.DLookup("study_subject_oid", "subjects",
                             "study_subject_id='%s'" % (last_ssid))
        ls_data_in_db = conn.DLookup("ls_data", "subjects",
                                     "study_subject_oid='%s'" % (ssoid))
        if lime_survey_data_to_import != ls_data_in_db:
            myImport = myDataWS.importLSData(ssoid, lime_survey_data_to_import)
            conn.WriteLSDataToDB(ssoid, lime_survey_data_to_import, myImport)
            my_report.append_to_report('wrote ls_data for subject %s 2' %
                                       (ssoid))

        # some book keeping to check if we must continue looping, or break the loop
        # first sleep a bit, so we do not eat up all CPU
        time.sleep(int(config['sleep_this_long']))
        current_time = datetime.datetime.now()
        difference = current_time - start_time
        loop_this_long = config['loop_this_long']
        max_diff_list = loop_this_long.split(sep=':')
        max_difference = datetime.timedelta(hours=int(max_diff_list[0]),
                                            minutes=int(max_diff_list[1]),
                                            seconds=int(max_diff_list[2]))
        if difference > max_difference:
            break

    my_report.append_to_report('finished looping from %s till %s.' %
                               (start_time, current_time))
    # close the file so we can send it
    my_report.close_file()
    MailThisLogFile('logs/report.txt')
Пример #23
0
    def __init__(self, args, sess, model):
        print("\nTraining is initializing itself\n")

        self.args = args
        self.sess = sess
        self.model = model

        # shortcut for model params
        self.params = self.model.params

        # To initialize all variables
        self.init = None
        self.init_model()

        # Create a saver object
        self.saver = tf.train.Saver(max_to_keep=self.args.max_to_keep,
                                    keep_checkpoint_every_n_hours=10,
                                    save_relative_paths=True)

        self.saver_best = tf.train.Saver(max_to_keep=1,
                                         save_relative_paths=True)

        # Load from latest checkpoint if found
        self.load_model()

        ##################################################################################
        # Init summaries

        # Summary variables
        self.scalar_summary_tags = [
            'mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch',
            'train-acc-per-epoch', 'val-acc-per-epoch'
        ]
        self.images_summary_tags = [
            ('train_prediction_sample',
             [None, self.params.img_height, self.params.img_width * 2, 3]),
            ('val_prediction_sample',
             [None, self.params.img_height, self.params.img_width * 2, 3])
        ]

        self.summary_tags = []
        self.summary_placeholders = {}
        self.summary_ops = {}
        # init summaries and it's operators
        self.init_summaries()
        # Create summary writer
        self.summary_writer = tf.summary.FileWriter(self.args.summary_dir,
                                                    self.sess.graph)
        ##################################################################################
        if self.args.mode == 'train':
            self.num_iterations_training_per_epoch = self.args.tfrecord_train_len // self.args.batch_size
            self.num_iterations_validation_per_epoch = self.args.tfrecord_val_len // self.args.batch_size
        else:
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_test_data()
        ##################################################################################
        # Init metrics class
        self.metrics = Metrics(self.args.num_classes)
        # Init reporter class
        if self.args.mode == 'train' or 'overfit':
            self.reporter = Reporter(self.args.out_dir + 'report_train.json',
                                     self.args)
        elif self.args.mode == 'test':
            self.reporter = Reporter(self.args.out_dir + 'report_test.json',
                                     self.args)
Пример #24
0
#!/usr/bin/env python3

from utils.reporter import Reporter

if __name__ == '__main__':
    reporter = Reporter()
    reporter.report_release_failure()
Пример #25
0
    def setup(self,
              env,
              agent,
              max_training_episodes,
              max_test_episodes,
              max_steps_training,
              max_steps_testing,
              save_path=os.path.expanduser("~") + os.path.sep +
              "urnai_saved_traingings",
              file_name=str(datetime.now()).replace(" ", "_").replace(
                  ":", "_").replace(".", "_"),
              enable_save=True,
              save_every=10,
              relative_path=False,
              debug_level=0,
              reset_epsilon=False,
              tensorboard_logging=False,
              log_actions=True,
              episode_batch_avg_calculation=10,
              do_reward_test=False,
              reward_test_number_of_episodes=10,
              rolling_avg_window_size=20):
        self.versioner = Versioner()
        self.env = env
        self.agent = agent
        self.save_path = save_path
        self.file_name = file_name
        self.enable_save = enable_save
        self.save_every = save_every
        self.relative_path = relative_path
        self.reset_epsilon = reset_epsilon
        self.max_training_episodes = max_training_episodes
        self.max_test_episodes = max_test_episodes
        self.max_steps_training = max_steps_training
        self.max_steps_testing = max_steps_testing
        self.curr_training_episodes = -1
        self.curr_playing_episodes = -1
        rp.VERBOSITY_LEVEL = debug_level
        self.tensorboard_logging = tensorboard_logging
        self.log_actions = log_actions
        self.episode_batch_avg_calculation = episode_batch_avg_calculation
        self.do_reward_test = do_reward_test
        self.reward_test_number_of_episodes = reward_test_number_of_episodes
        self.rolling_avg_window_size = rolling_avg_window_size
        self.inside_training_test_loggers = []

        self.logger = Logger(
            0,
            self.agent.__class__.__name__,
            self.agent.model.__class__.__name__,
            self.agent.model,
            self.agent.action_wrapper.__class__.__name__,
            self.agent.action_wrapper.get_action_space_dim(),
            self.agent.action_wrapper.get_named_actions(),
            self.agent.state_builder.__class__.__name__,
            self.agent.reward_builder.__class__.__name__,
            self.env.__class__.__name__,
            log_actions=self.log_actions,
            episode_batch_avg_calculation=self.episode_batch_avg_calculation,
            rolling_avg_window_size=self.rolling_avg_window_size)

        # Adding epsilon, learning rate and gamma factors to our pickle black list,
        # so that they are not loaded when loading the model's weights.
        # Making it so that the current training session acts as a brand new training session
        # (except for the fact that the model's weights may already be somewhat optimized from previous trainings)
        if self.reset_epsilon:
            self.agent.model.pickle_black_list.append("epsilon_greedy")
            self.agent.model.pickle_black_list.append("epsilon_decay_rate")
            self.agent.model.pickle_black_list.append("epsilon_min")
            self.agent.model.pickle_black_list.append("gamma")
            self.agent.model.pickle_black_list.append("learning_rate")
            self.agent.model.pickle_black_list.append("learning_rate_min")
            self.agent.model.pickle_black_list.append("learning_rate_decay")
            self.agent.model.pickle_black_list.append(
                "learning_rate_decay_ep_cutoff")

        currentdir = os.path.dirname(
            os.path.abspath(inspect.getfile(inspect.currentframe())))
        parentdir = os.path.dirname(currentdir)
        parentdir = os.path.dirname(parentdir)
        if (relative_path):
            self.full_save_path = parentdir + os.path.sep + self.save_path + os.path.sep + self.file_name
        else:
            self.full_save_path = self.save_path + os.path.sep + self.file_name

        self.full_save_play_path = self.full_save_path + os.path.sep + "play_files"

        if self.enable_save and os.path.exists(self.full_save_path):
            rp.report("WARNING! Loading training from " + self.full_save_path +
                      " with SAVING ENABLED.")
            self.load(self.full_save_path)
            self.versioner.ask_for_continue()
            self.make_persistance_dirs(self.log_actions)
        elif self.enable_save:
            rp.report("WARNING! Starting new training on " +
                      self.full_save_path + " with SAVING ENABLED.")
            self.make_persistance_dirs(self.log_actions)
        else:
            rp.report(
                "WARNING! Starting new training WITHOUT SAVING PROGRESS.")

        if (self.tensorboard_logging):
            logdir = self.full_save_path + "/tf_logs"
            self.agent.model.tensorboard_callback = [
                tf.keras.callbacks.TensorBoard(log_dir=logdir)
            ]
Пример #26
0
 def save_extra(self, save_path):
     self.env.save(save_path)
     self.agent.save(save_path)
     self.logger.save(save_path)
     self.versioner.save(save_path)
     rp.save(save_path)
Пример #27
0
 def load_extra(self, save_path):
     self.agent.load(save_path)
     self.env.load(save_path)
     self.logger.load(save_path)
     self.versioner.load(save_path)
     rp.load(save_path)
Пример #28
0
#!/usr/bin/env python3

from utils.reporter import Reporter

if __name__ == '__main__':
    reporter = Reporter()
    reporter.report_test_failure()
Пример #29
0
def cycle_through_syncs():
    my_report = Reporter()
    
    start_time = datetime.datetime.now()
    my_report.append_to_report('INFO: cycle started at ' + str(start_time))
    # read configuration file for usernames and passwords and other parameters
    config=readDictFile('oli.config')
    # set from this config the sid, because it used everywhere
    sid = int(config['sid'])
    
    # create a connection to the postgresql database
    conn = ConnToOliDB()
    my_report.append_to_report(conn.init_result)

    # initialize the oc-webservice
    myDataWS = dataWS(config['userName'], config['password'], config['baseUrl'])
    
    #start the cycling here
    while True:
        # get the responses as a list
        responses_list = read_ls_responses(config)
        # process the responses one by one
        for one_response in responses_list:
            #print(one_response)           
            # get the response_id, for administrative purposes
            response_id = one_response['id']
            # check if this combination sid-response-id already exists and if not, add it
            conn.TryToAddSubjectToDB(sid, response_id)
            # now see if we can do something with the data: start with the child code
            # reset study_subject_id and study_subject_oid
            study_subject_id = None 
            study_subject_oid = None
            if (one_response['ChildCode'] is None):
                # write this to error report
                my_report.append_to_report('ERROR: Missing ChildCode for resp.id. %i' % response_id )
            else:
                # add leading zero's and the study prefix
                study_subject_id = config['childcode_prefix'] + ('0000' + str(int(float(one_response['ChildCode']))))[-8:]
                if (len(study_subject_id) != 13):
                    # write this to error report 
                    my_report.append_to_report('ERROR: Incorrect ChildCode for resp.id. %i: %i' % (response_id, int(float(one_response['ChildCode']))))
                else:
                    # write the child-code / study subject id to the database
                    if (conn.DLookup('study_subject_id', 'ls_responses', 'sid=%i and response_id=%i' % (sid, response_id)) is None):
                        conn.WriteStudySubjectID(sid, response_id, study_subject_id)
                        
                    # check if we already have a valid study subject oid
                    study_subject_oid = conn.DLookup('study_subject_oid', 'ls_responses', 'sid=%i and response_id=%i' % (sid, response_id))
                    if (study_subject_oid is None or study_subject_oid =='None'):
                        # try to get a valid study subject oid
                        study_subject_oid = PGSubject(study_subject_id).GetSSOID()
                        # we don't know if we now have study_subject_oid,
                        # but the procedure only writes the study subject oid to the database for later use
                        # if it is not null
                        conn.WriteStudySubjectOID(sid, response_id, study_subject_oid)
                    
                    
                    # only continue if we have both study subject id and study subject oid
                    if (study_subject_oid is None):
                        # write this to error report
                        my_report.append_to_report('ERROR: missing OID for resp.id. %i : ChildCode %s' % (response_id, study_subject_id))
                    else:
                        # only compose the odm and try to import the result
                        # if this wasn't done before, so look at date_completed
                        if(conn.DLookup('date_completed', 'ls_responses', 'sid=%i and response_id=%i' % (sid, response_id)) is None):
                            #print(one_response)
                            print('resp.id. %i' % response_id)
                            # we try to compose the request, but if we can't convert an item to the correct data type
                            # then we put that in the report
                            ws_request = compose_odm(study_subject_oid, one_response)
                            if (ws_request.find('CONVERSION-ERROR') != -1):
                                #print(ws_request)
                                item_starts_at = ws_request.find('CONVERSION-ERROR')
                                my_report.append_to_report('ERROR: conversion for resp.id. %i %s failed with message "%s" and more' % (response_id, study_subject_id, ws_request[item_starts_at:item_starts_at + 100]))
                            else:
                                #print(ws_request)
                                conn.WriteDataWSRequest(sid, response_id, ws_request)
                                import_result = myDataWS.importData(ws_request)
                                #print(import_result)
                                import_result = import_result.replace("'", "")
                                conn.WriteDataWSResponse(sid, response_id, import_result)
                                if (import_result.find('Success') == 0):
                                    my_report.append_to_report('INFO: Successfully imported data for %s (%s)' % (study_subject_id, study_subject_oid))
                                    conn.SetResponseComplete(sid, response_id)
                                else:
                                    item_starts_at = import_result.find('I_')
                                    my_report.append_to_report('ERROR: import for resp.id %i %s failed with message "%s" and more' % (response_id, study_subject_id, import_result[item_starts_at:]))
                                    
            # move on with the next response 
                                
        # check if we must continue looping, or break the loop
        # first sleep a bit, so we do not eat up all CPU
        time.sleep(int(config['sleep_this_long']))
        current_time = datetime.datetime.now()
        difference = current_time - start_time
        loop_this_long = config['loop_this_long']
        max_diff_list = loop_this_long.split(sep=':') 
        max_difference = datetime.timedelta(hours=int(max_diff_list[0]), minutes=int(max_diff_list[1]), seconds=int(max_diff_list[2]))
        if difference > max_difference:
            break
    
    my_report.append_to_report('INFO: finished looping from %s till %s.' % (start_time, current_time))
    # close the file so we can send it
    my_report.close_file()
    MailThisLogFile('logs/report.txt')
Пример #30
0
class NewTrain(object):
    def __init__(self, args, sess, model):
        print("\nTraining is initializing itself\n")

        self.args = args
        self.sess = sess
        self.model = model

        # shortcut for model params
        self.params = self.model.params

        # To initialize all variables
        self.init = None
        self.init_model()

        # Create a saver object
        self.saver = tf.train.Saver(max_to_keep=self.args.max_to_keep,
                                    keep_checkpoint_every_n_hours=10,
                                    save_relative_paths=True)

        self.saver_best = tf.train.Saver(max_to_keep=1,
                                         save_relative_paths=True)

        # Load from latest checkpoint if found
        self.load_model()

        ##################################################################################
        # Init summaries

        # Summary variables
        self.scalar_summary_tags = [
            'mean_iou_on_val', 'train-loss-per-epoch', 'val-loss-per-epoch',
            'train-acc-per-epoch', 'val-acc-per-epoch'
        ]
        self.images_summary_tags = [
            ('train_prediction_sample',
             [None, self.params.img_height, self.params.img_width * 2, 3]),
            ('val_prediction_sample',
             [None, self.params.img_height, self.params.img_width * 2, 3])
        ]

        self.summary_tags = []
        self.summary_placeholders = {}
        self.summary_ops = {}
        # init summaries and it's operators
        self.init_summaries()
        # Create summary writer
        self.summary_writer = tf.summary.FileWriter(self.args.summary_dir,
                                                    self.sess.graph)
        ##################################################################################
        if self.args.mode == 'train':
            self.num_iterations_training_per_epoch = self.args.tfrecord_train_len // self.args.batch_size
            self.num_iterations_validation_per_epoch = self.args.tfrecord_val_len // self.args.batch_size
        else:
            self.test_data = None
            self.test_data_len = None
            self.num_iterations_testing_per_epoch = None
            self.load_test_data()
        ##################################################################################
        # Init metrics class
        self.metrics = Metrics(self.args.num_classes)
        # Init reporter class
        if self.args.mode == 'train' or 'overfit':
            self.reporter = Reporter(self.args.out_dir + 'report_train.json',
                                     self.args)
        elif self.args.mode == 'test':
            self.reporter = Reporter(self.args.out_dir + 'report_test.json',
                                     self.args)
            ##################################################################################

    @timeit
    def load_test_data(self):
        print("Loading Testing data..")
        self.test_data = {
            'X': np.load(self.args.data_dir + "X_val.npy"),
            'Y': np.load(self.args.data_dir + "Y_val.npy")
        }
        self.test_data_len = self.test_data['X'].shape[
            0] - self.test_data['X'].shape[0] % self.args.batch_size
        print("Test-shape-x -- " + str(self.test_data['X'].shape))
        print("Test-shape-y -- " + str(self.test_data['Y'].shape))
        self.num_iterations_testing_per_epoch = (self.test_data_len +
                                                 self.args.batch_size -
                                                 1) // self.args.batch_size
        print("Test data is loaded")

    @timeit
    def init_model(self):
        print("Initializing the variables of the model")
        self.init = tf.group(tf.global_variables_initializer(),
                             tf.local_variables_initializer())
        self.sess.run(self.init)
        print("Initialization finished")

    def save_model(self):
        """
        Save Model Checkpoint
        :return:
        """
        print("saving a checkpoint")
        self.saver.save(self.sess, self.args.checkpoint_dir,
                        self.model.global_step_tensor)
        print("Saved a checkpoint")

    def save_best_model(self):
        """
        Save BEST Model Checkpoint
        :return:
        """
        print("saving a checkpoint for the best model")
        self.saver_best.save(self.sess, self.args.checkpoint_best_dir,
                             self.model.global_step_tensor)
        print("Saved a checkpoint for the best model")

    def load_best_model(self):
        """
        Load the best model checkpoint
        :return:
        """
        print("loading a checkpoint for BEST ONE")
        latest_checkpoint = tf.train.latest_checkpoint(
            self.args.checkpoint_best_dir)
        if latest_checkpoint:
            print(
                "Loading model checkpoint {} ...\n".format(latest_checkpoint))
            self.saver_best.restore(self.sess, latest_checkpoint)
        else:
            print("ERROR NO best checkpoint found")
            exit(-1)
        print("BEST MODEL LOADED..")

    def init_summaries(self):
        """
        Create the summary part of the graph
        :return:
        """
        with tf.variable_scope('train-summary-per-epoch'):
            for tag in self.scalar_summary_tags:
                self.summary_tags += tag
                self.summary_placeholders[tag] = tf.placeholder('float32',
                                                                None,
                                                                name=tag)
                self.summary_ops[tag] = tf.summary.scalar(
                    tag, self.summary_placeholders[tag])
            for tag, shape in self.images_summary_tags:
                self.summary_tags += tag
                self.summary_placeholders[tag] = tf.placeholder('float32',
                                                                shape,
                                                                name=tag)
                self.summary_ops[tag] = tf.summary.image(
                    tag, self.summary_placeholders[tag], max_outputs=10)

    def add_summary(self, step, summaries_dict=None, summaries_merged=None):
        """
        Add the summaries to tensorboard
        :param step:
        :param summaries_dict:
        :param summaries_merged:
        :return:
        """
        if summaries_dict is not None:
            summary_list = self.sess.run(
                [self.summary_ops[tag] for tag in summaries_dict.keys()], {
                    self.summary_placeholders[tag]: value
                    for tag, value in summaries_dict.items()
                })
            for summary in summary_list:
                self.summary_writer.add_summary(summary, step)
        if summaries_merged is not None:
            self.summary_writer.add_summary(summaries_merged, step)

    @timeit
    def load_model(self):
        """
        Load the latest checkpoint
        :return:
        """
        try:
            # This is for loading the pretrained weights if they can't be loaded during initialization.
            self.model.encoder.load_pretrained_weights(self.sess)
        except AttributeError:
            pass

        print("Searching for a checkpoint")
        latest_checkpoint = tf.train.latest_checkpoint(
            self.args.checkpoint_dir)
        if latest_checkpoint:
            print(
                "Loading model checkpoint {} ...\n".format(latest_checkpoint))
            self.saver.restore(self.sess, latest_checkpoint)
            print("Model loaded from the latest checkpoint\n")
        else:
            print("\n.. No ckpt, SO First time to train :D ..\n")

    def train(self):
        print("Training mode will begin NOW ..")
        tf.train.start_queue_runners(sess=self.sess)
        curr_lr = self.model.args.learning_rate
        for cur_epoch in range(
                self.model.global_epoch_tensor.eval(self.sess) + 1,
                self.args.num_epochs + 1, 1):

            # init tqdm and get the epoch value
            tt = tqdm(range(self.num_iterations_training_per_epoch),
                      total=self.num_iterations_training_per_epoch,
                      desc="epoch-" + str(cur_epoch) + "-")

            # init acc and loss lists
            loss_list = []
            acc_list = []

            # loop by the number of iterations
            for cur_iteration in tt:

                # get the cur_it for the summary
                cur_it = self.model.global_step_tensor.eval(self.sess)

                # Feed this variables to the network
                feed_dict = {
                    self.model.handle: self.model.training_handle,
                    self.model.is_training: True,
                    self.model.curr_learning_rate: curr_lr
                }

                # Run the feed forward but the last iteration finalize what you want to do
                if cur_iteration < self.num_iterations_training_per_epoch - 1:

                    # run the feed_forward
                    _, loss, acc, summaries_merged = self.sess.run(
                        [
                            self.model.train_op, self.model.loss,
                            self.model.accuracy, self.model.merged_summaries
                        ],
                        feed_dict=feed_dict)
                    # log loss and acc
                    loss_list += [loss]
                    acc_list += [acc]
                    # summarize
                    self.add_summary(cur_it, summaries_merged=summaries_merged)

                else:
                    # run the feed_forward
                    _, loss, acc, summaries_merged, segmented_imgs = self.sess.run(
                        [
                            self.model.train_op, self.model.loss,
                            self.model.accuracy, self.model.merged_summaries,
                            self.model.segmented_summary
                        ],
                        feed_dict=feed_dict)
                    # log loss and acc
                    loss_list += [loss]
                    acc_list += [acc]
                    total_loss = np.mean(loss_list)
                    total_acc = np.mean(acc_list)
                    # summarize
                    summaries_dict = dict()
                    summaries_dict['train-loss-per-epoch'] = total_loss
                    summaries_dict['train-acc-per-epoch'] = total_acc
                    summaries_dict['train_prediction_sample'] = segmented_imgs
                    self.add_summary(cur_it,
                                     summaries_dict=summaries_dict,
                                     summaries_merged=summaries_merged)

                    # report
                    self.reporter.report_experiment_statistics(
                        'train-acc', 'epoch-' + str(cur_epoch), str(total_acc))
                    self.reporter.report_experiment_statistics(
                        'train-loss', 'epoch-' + str(cur_epoch),
                        str(total_loss))
                    self.reporter.finalize()

                    # Update the Global step
                    self.model.global_step_assign_op.eval(
                        session=self.sess,
                        feed_dict={self.model.global_step_input: cur_it + 1})

                    # Update the Cur Epoch tensor
                    # it is the last thing because if it is interrupted it repeat this
                    self.model.global_epoch_assign_op.eval(
                        session=self.sess,
                        feed_dict={
                            self.model.global_epoch_input: cur_epoch + 1
                        })

                    # print in console
                    tt.close()
                    print("epoch-" + str(cur_epoch) + "-" + "loss:" +
                          str(total_loss) + "-" + " acc:" + str(total_acc)[:6])

                    # Break the loop to finalize this epoch
                    break

                # Update the Global step
                self.model.global_step_assign_op.eval(
                    session=self.sess,
                    feed_dict={self.model.global_step_input: cur_it + 1})

            # Save the current checkpoint
            if cur_epoch % self.args.save_every == 0:
                self.save_model()

            # Test the model on validation
            if cur_epoch % self.args.test_every == 0:
                self.test_per_epoch(
                    step=self.model.global_step_tensor.eval(self.sess),
                    epoch=self.model.global_epoch_tensor.eval(self.sess))

            if cur_epoch % self.args.learning_decay_every == 0:
                curr_lr = curr_lr * self.args.learning_decay
                print('Current learning rate is ', curr_lr)

        print("Training Finished")

    def test_per_epoch(self, step, epoch):
        print("Validation at step:" + str(step) + " at epoch:" + str(epoch) +
              " ..")

        # init tqdm and get the epoch value
        tt = tqdm(range(self.num_iterations_validation_per_epoch),
                  total=self.num_iterations_validation_per_epoch,
                  desc="Val-epoch-" + str(epoch) + "-")

        # init acc and loss lists
        loss_list = []
        acc_list = []
        inf_list = []

        # reset metrics
        self.metrics.reset()

        # get the maximum iou to compare with and save the best model
        max_iou = self.model.best_iou_tensor.eval(self.sess)

        # init dataset to validation
        self.sess.run(self.model.validation_iterator.initializer)

        # loop by the number of iterations
        for cur_iteration in tt:
            # Feed this variables to the network
            feed_dict = {
                self.model.handle: self.model.validation_handle,
                self.model.is_training: False
            }

            # Run the feed forward but the last iteration finalize what you want to do
            if cur_iteration < self.num_iterations_validation_per_epoch - 1:

                start = time.time()
                # run the feed_forward
                next_img, out_argmax, loss, acc = self.sess.run(
                    [
                        self.model.next_img, self.model.out_argmax,
                        self.model.loss, self.model.accuracy
                    ],
                    feed_dict=feed_dict)
                end = time.time()
                # log loss and acc
                loss_list += [loss]
                acc_list += [acc]
                inf_list += [end - start]
                # log metrics
                self.metrics.update_metrics_batch(out_argmax, next_img[1])

            else:
                start = time.time()
                # run the feed_forward
                next_img, out_argmax, loss, acc, segmented_imgs = self.sess.run(
                    [
                        self.model.next_img, self.model.out_argmax,
                        self.model.loss, self.model.accuracy,
                        self.model.segmented_summary
                    ],
                    feed_dict=feed_dict)
                end = time.time()
                # log loss and acc
                loss_list += [loss]
                acc_list += [acc]
                inf_list += [end - start]
                # log metrics
                self.metrics.update_metrics_batch(out_argmax, next_img[1])
                # mean over batches
                total_loss = np.mean(loss_list)
                total_acc = np.mean(acc_list)
                mean_iou = self.metrics.compute_final_metrics(
                    self.num_iterations_validation_per_epoch)
                mean_iou_arr = self.metrics.iou
                mean_inference = str(np.mean(inf_list)) + '-seconds'
                # summarize
                summaries_dict = dict()
                summaries_dict['val-loss-per-epoch'] = total_loss
                summaries_dict['val-acc-per-epoch'] = total_acc
                summaries_dict['mean_iou_on_val'] = mean_iou
                summaries_dict['val_prediction_sample'] = segmented_imgs
                self.add_summary(step, summaries_dict=summaries_dict)
                self.summary_writer.flush()

                # report
                self.reporter.report_experiment_statistics(
                    'validation-acc', 'epoch-' + str(epoch), str(total_acc))
                self.reporter.report_experiment_statistics(
                    'validation-loss', 'epoch-' + str(epoch), str(total_loss))
                self.reporter.report_experiment_statistics(
                    'avg_inference_time_on_validation', 'epoch-' + str(epoch),
                    str(mean_inference))
                self.reporter.report_experiment_validation_iou(
                    'epoch-' + str(epoch), str(mean_iou), mean_iou_arr)
                self.reporter.finalize()

                # print in console
                tt.close()
                print("Val-epoch-" + str(epoch) + "-" + "loss:" +
                      str(total_loss) + "-" + "acc:" + str(total_acc)[:6] +
                      "-mean_iou:" + str(mean_iou))
                print("Last_max_iou: " + str(max_iou))
                if mean_iou > max_iou:
                    print(
                        "This validation got a new best iou. so we will save this one"
                    )
                    # save the best model
                    self.save_best_model()
                    # Set the new maximum
                    self.model.best_iou_assign_op.eval(
                        session=self.sess,
                        feed_dict={self.model.best_iou_input: mean_iou})
                else:
                    print("hmm not the best validation epoch :/..")

                # Break the loop to finalize this epoch
                break

    def test(self):
        print("Testing mode will begin NOW..")

        # load the best model checkpoint to test on it
        self.load_best_model()

        # init tqdm and get the epoch value
        tt = tqdm(range(self.test_data_len))
        naming = np.load(self.args.data_dir + 'names_train.npy')

        # init acc and loss lists
        loss_list = []
        acc_list = []
        img_list = []

        # idx of image
        idx = 0

        # reset metrics
        self.metrics.reset()

        # loop by the number of iterations
        for cur_iteration in tt:
            # load mini_batches
            x_batch = self.test_data['X'][idx:idx + 1]
            y_batch = self.test_data['Y'][idx:idx + 1]

            # update idx of mini_batch
            idx += 1

            # Feed this variables to the network
            feed_dict = {
                self.model.x_pl: x_batch,
                self.model.y_pl: y_batch,
                self.model.is_training: False
            }

            # run the feed_forward
            out_argmax, loss, acc, summaries_merged, segmented_imgs = self.sess.run(
                [
                    self.model.out_argmax, self.model.loss,
                    self.model.accuracy, self.model.merged_summaries,
                    self.model.segmented_summary
                ],
                feed_dict=feed_dict)

            np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy',
                    out_argmax[0])
            plt.imsave(
                self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) +
                '.png', segmented_imgs[0])

            # log loss and acc
            loss_list += [loss]
            acc_list += [acc]

            # log metrics
            self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0)

        # mean over batches
        total_loss = np.mean(loss_list)
        total_acc = np.mean(acc_list)
        mean_iou = self.metrics.compute_final_metrics(self.test_data_len)

        # print in console
        tt.close()
        print("Here the statistics")
        print("Total_loss: " + str(total_loss))
        print("Total_acc: " + str(total_acc)[:6])
        print("mean_iou: " + str(mean_iou))

        print("Plotting imgs")

    def finalize(self):
        self.reporter.finalize()
        self.summary_writer.close()
        self.save_model()