Пример #1
0
def trainAgent(configDir, exitOnFail=False):
    try:
        multiprocessing.set_start_method('spawn')
    except RuntimeError:
        pass

    config = KwolaCoreConfiguration(configDir)

    # Create the bugs directory. This is just temporary
    config.getKwolaUserDataDirectory("bugs")

    # Load and save the agent to make sure all training subprocesses are synced
    agent = DeepLearningAgent(config=config, whichGpu=None)
    agent.initialize(enableTraining=False)
    agent.load()
    agent.save()
    del agent

    # Create and destroy an environment, which forces a lot of the initial javascript in the application
    # to be loaded and translated. It also just verifies that the system can access the target URL prior
    # to trying to run a full sequence
    environment = WebEnvironment(config, sessionLimit=1)
    environment.shutdown()
    del environment

    trainingSequence = TrainingSequence(
        id=CustomIDField.generateNewUUID(TrainingSequence, config))

    trainingSequence.startTime = datetime.now()
    trainingSequence.status = "running"
    trainingSequence.trainingStepsCompleted = 0
    trainingSequence.saveToDisk(config)

    testingSteps = [
        step for step in TrainingManager.loadAllTestingSteps(config)
        if step.status == "completed"
    ]
    if len(testingSteps) == 0:
        runRandomInitialization(config,
                                trainingSequence,
                                exitOnFail=exitOnFail)
        trainingSequence.saveToDisk(config)

    runMainTrainingLoop(config, trainingSequence, exitOnFail=exitOnFail)

    trainingSequence.status = "completed"
    trainingSequence.endTime = datetime.now()
    trainingSequence.saveToDisk(config)
Пример #2
0
def trainAgent(configDir, exitOnFail=False):
    try:
        multiprocessing.set_start_method('spawn')
    except RuntimeError:
        pass

    config = KwolaCoreConfiguration(configDir)

    # Create the bugs directory. This is just temporary
    config.getKwolaUserDataDirectory("bugs")

    # Load and save the agent to make sure all training subprocesses are synced
    agent = DeepLearningAgent(config=config, whichGpu=None)
    agent.initialize(enableTraining=False)
    agent.load()
    agent.save()
    del agent

    browsers = getAvailableBrowsers(config)

    # Create and destroy an environment, which forces a lot of the initial javascript in the application
    # to be loaded and translated. It also just verifies that the system can access the target URL prior
    # to trying to run a full sequence
    environment = WebEnvironment(config, sessionLimit=1, browser=browsers[0])
    environment.shutdown()
    del environment

    files = [
        fileName for fileName in os.listdir(
            config.getKwolaUserDataDirectory("training_sequences"))
        if ".lock" not in fileName
    ]

    if len(files) == 0:
        trainingSequence = TrainingSequence(
            id=CustomIDField.generateNewUUID(TrainingSequence, config))

        trainingSequence.startTime = datetime.now()
        trainingSequence.status = "running"
        trainingSequence.trainingLoopsCompleted = 0
        trainingSequence.trainingStepsLaunched = 0
        trainingSequence.testingStepsLaunched = 0
        trainingSequence.saveToDisk(config)
    else:
        sequenceId = files[0]
        sequenceId = sequenceId.replace(".pickle", "")
        sequenceId = sequenceId.replace(".json", "")
        sequenceId = sequenceId.replace(".gz", "")

        LockedFile.clearLockFile(
            os.path.join(
                config.getKwolaUserDataDirectory("training_sequences"),
                files[0]))
        trainingSequence = TrainingSequence.loadFromDisk(sequenceId, config)

    testingSteps = [
        step for step in TrainingManager.loadAllTestingSteps(config)
        if step.status == "completed"
    ]
    if len(testingSteps) == 0:
        runRandomInitialization(config,
                                trainingSequence,
                                exitOnFail=exitOnFail)
        trainingSequence.saveToDisk(config)

    runMainTrainingLoop(config, trainingSequence, exitOnFail=exitOnFail)

    generateAllCharts(config, enableCumulativeCoverage=True)

    trainingSequence.status = "completed"
    trainingSequence.endTime = datetime.now()
    trainingSequence.saveToDisk(config)

    for folder in config['train_agent_loop_delete_folders_on_finish']:
        fullPath = config.getKwolaUserDataDirectory(folder)
        shutil.rmtree(fullPath)
Пример #3
0
    def runTesting(self):
        getLogger().info(f"[{os.getpid()}] Starting New Testing Sequence")

        resultValue = {'success': True}

        try:
            try:
                multiprocessing.set_start_method('spawn')
            except RuntimeError:
                pass

            self.testStep.startTime = datetime.now()
            self.testStep.status = "running"
            self.testStep.saveToDisk(self.config)

            resultValue["testingStepId"] = str(self.testStep.id)

            self.createExecutionSessions()
            self.createTestingSubprocesses()

            for plugin in self.testingStepPlugins:
                plugin.testingStepStarted(self.testStep,
                                          self.executionSessions)

            self.environment = WebEnvironment(
                config=self.config, executionSessions=self.executionSessions)

            self.loopTime = datetime.now()
            while self.stepsRemaining > 0:
                self.stepsRemaining -= 1

                self.removeBadSessions()
                if len(self.executionSessions) == 0:
                    break

                self.executeSingleAction()

                if self.config[
                        'testing_reset_agent_period'] == 1 or self.stepsRemaining % self.config[
                            'testing_reset_agent_period'] == (
                                self.config['testing_reset_agent_period'] - 1):
                    self.restartOneTestingSubprocess()

                self.step += 1

            self.killAndJoinTestingSubprocesses()
            self.removeBadSessions()

            # Ensure all the trace objects get saved to disc
            self.traceSaveExecutor.shutdown()

            self.environment.runSessionCompletedHooks()

            self.savePlainVideoFiles()

            for session in self.executionSessions:
                session.endTime = datetime.now()
                session.saveToDisk(self.config)

            # We shutdown the environment before generating the annotated videos in order
            # to conserve memory, since the environment is no longer needed after this point
            self.shutdownEnvironment()

            self.testStep.status = "completed"
            self.testStep.endTime = datetime.now()
            self.testStep.executionSessions = [
                session.id for session in self.executionSessions
            ]

            for plugin in self.testingStepPlugins:
                plugin.testingStepFinished(self.testStep,
                                           self.executionSessions)

            self.testStep.saveToDisk(self.config)
            resultValue['successfulExecutionSessions'] = len(
                self.testStep.executionSessions)
            resultValue['success'] = True

            for traceList in self.executionSessionTraceLocalPickleFiles:
                for fileName in traceList:
                    os.unlink(fileName)

        except Exception as e:
            getLogger().error(
                f"[{os.getpid()}] Unhandled exception occurred during testing sequence:\n{traceback.format_exc()}"
            )
            resultValue['success'] = False
            resultValue['exception'] = traceback.format_exc()

        # This print statement will trigger the parent manager process to kill this process.
        getLogger().info(f"[{os.getpid()}] Finished Running Testing Sequence!")

        return resultValue
Пример #4
0
# -*- coding: utf-8 -*-
import tempfile,os,sys,pickle
if sys.platform == 'win32':
    from multiprocessing import Process
else:
    import billiard
    billiard.set_start_method('spawn')
    from billiard import Process
    
import matplotlib
matplotlib.use('Qt5Agg')
from hydroid.HYDROIDexp import assign_peaks_interactive,call_peaks_interactive,fit_peaks,plot_prof_on_seq



class hydroidConfig(object):
    def __init__(self,laneList):
        self.laneList=laneList
        #we use it like that as regular temfile can not be opend multiple times in windows
        self.fd,self.configFile=tempfile.mkstemp()
        with os.fdopen(self.fd,'w') as file:
            file.write('''#
#column - name of column with an array of profile values in lane_profile.xls
#lanme - arbitrary name of the gel lane (e.g. experiment label).
#leftlim, rightlim - indexes of values in profile array that specify the datarange that will be analyzed, in NaN dataset is not truncated
#peakthresh - parameter for automatic peak identification algorithm, use lower values to identify more subtle peaks. However, this may lead to false positives.
#min_dist_left - minimal distance between the individual peaks allowed at the left side of the data range.
#min_dist_right - minimal distance between the individual peaks allowed at the right side of the data range.
#segments - number of segment the data range is split into for linear interpolation of the min_dist value for the automatic peak identification algorithm.
#base - try to substract linear baseline from the profile before peak identification attempt.
#interpolate - try to guess the location of unidentified peaks from the local average spacing betwee the nearby peaks.
Пример #5
0
def trainAgent(config, exitOnFail=False):
    try:
        multiprocessing.set_start_method('spawn')
    except RuntimeError:
        pass

    config = KwolaCoreConfiguration(config)

    if config['train_agent_loop_wait_for_other_kwola_processes_to_exit']:
        if checkIfProcessRunning("kwola"):
            getLogger().info("Waiting for the other Kwola process to finish running. If you want to run multiple Kwola processes at once, please change the train_agent_loop_wait_for_other_kwola_processes_to_exit configuration variable.")
            while checkIfProcessRunning("kwola"):
                time.sleep(1)

    # Load and save the agent to make sure all training subprocesses are synced
    agent = DeepLearningAgent(config=config, whichGpu=None)
    agent.initialize(enableTraining=False)
    agent.load()
    agent.save()
    del agent

    files = [fileName for fileName in os.listdir(config.getKwolaUserDataDirectory("training_sequences")) if ".lock" not in fileName]

    if len(files) == 0:
        trainingSequence = TrainingSequence(id=CustomIDField.generateNewUUID(TrainingSequence, config))

        trainingSequence.startTime = datetime.now()
        trainingSequence.status = "running"
        trainingSequence.trainingLoopsCompleted = 0
        trainingSequence.trainingStepsLaunched = 0
        trainingSequence.testingStepsLaunched = 0
        trainingSequence.saveToDisk(config)
    else:
        sequenceId = files[0]
        sequenceId = sequenceId.replace(".pickle", "")
        sequenceId = sequenceId.replace(".json", "")
        sequenceId = sequenceId.replace(".gz", "")
        sequenceId = sequenceId.replace(".enc", "")

        trainingSequence = TrainingSequence.loadFromDisk(sequenceId, config)

    testingSteps = [step for step in TrainingManager.loadAllTestingSteps(config) if step.status == "completed"]

    if len(testingSteps) == 0:
        browsers = getAvailableBrowsers(config)

        # Create and destroy an environment, which forces a lot of the initial javascript in the application
        # to be loaded and translated. It also just verifies that the system can access the target URL prior
        # to trying to run a full sequence
        environment = WebEnvironment(config, sessionLimit=1, browser=browsers[0])
        environment.shutdown()
        del environment

    if len(testingSteps) < config['training_random_initialization_sequences']:
        runRandomInitialization(config, trainingSequence, exitOnFail=exitOnFail)
        trainingSequence.saveToDisk(config)

    loopsCompleted = runMainTrainingLoop(config, trainingSequence, exitOnFail=exitOnFail)

    generateAllCharts(config, applicationId=None, enableCumulativeCoverage=True)

    trainingSequence.status = "completed"
    trainingSequence.endTime = datetime.now()
    trainingSequence.saveToDisk(config)

    if config['train_agent_loop_email_results'] and loopsCompleted > 0:
        from ..components.utils.email import sendExperimentResults
        sendExperimentResults(config)

    for folder in config['train_agent_loop_delete_folders_on_finish']:
        fullPath = config.getKwolaUserDataDirectory(folder)
        shutil.rmtree(fullPath)
Пример #6
0
    def runTesting(self):
        if self.config['print_configuration_on_startup']:
            getLogger().info(
                f"Starting New Testing Sequence with configuration:\n{pformat(self.config.configData)}"
            )
        else:
            getLogger().info(f"Starting New Testing Sequence")

        resultValue = {'success': True}

        try:
            try:
                multiprocessing.set_start_method('spawn')
            except RuntimeError:
                pass

            self.testStep.startTime = datetime.now()
            self.testStep.status = "running"
            self.testStep.saveToDisk(self.config)

            resultValue["testingStepId"] = str(self.testStep.id)

            self.createExecutionSessions()
            if self.config['testing_enable_prediction_subprocess']:
                self.createTestingSubprocesses()

            for plugin in self.testingStepPlugins:
                plugin.testingStepStarted(self.testStep,
                                          self.executionSessions)

            self.environment = WebEnvironment(
                config=self.config,
                executionSessions=self.executionSessions,
                plugins=self.webEnvironmentPlugins,
                browser=self.browser,
                windowSize=self.windowSize)

            self.loopTime = datetime.now()
            while self.stepsRemaining > 0:
                self.stepsRemaining -= 1

                self.removeBadSessions()
                if len(self.executionSessions) == 0:
                    break

                self.executeSingleAction()

                if self.config['testing_enable_prediction_subprocess']:
                    if self.config[
                            'testing_reset_agent_period'] == 1 or self.stepsRemaining % self.config[
                                'testing_reset_agent_period'] == (
                                    self.config['testing_reset_agent_period'] -
                                    1):
                        self.restartOneTestingSubprocess()

                self.step += 1

            if self.config['testing_enable_prediction_subprocess']:
                self.killAndJoinTestingSubprocesses()
            self.removeBadSessions()

            # Compute the code prevalence scores for all of the traces
            allTraces = [
                trace for traceList in self.executionSessionTraces
                for trace in traceList
            ]
            self.agent.symbolMapper.load()
            self.agent.symbolMapper.computeCodePrevalenceScores(allTraces)

            # Ensure all the trace objects get saved to disc
            for trace in allTraces:
                self.traceSaveExecutor.submit(TestingStepManager.saveTrace,
                                              trace, self.config)

            self.traceSaveExecutor.shutdown()

            self.environment.runSessionCompletedHooks()

            for session in self.executionSessions:
                session.endTime = datetime.now()
                session.saveToDisk(self.config)

            # We shutdown the environment before generating the annotated videos in order
            # to conserve memory, since the environment is no longer needed after this point
            self.shutdownEnvironment()

            if len(self.executionSessions) == 0:
                self.testStep.status = "failed"
            else:
                self.testStep.status = "completed"
                self.testStep.browser = self.browser
                self.testStep.userAgent = self.executionSessions[0].userAgent

            self.testStep.endTime = datetime.now()
            self.testStep.executionSessions = [
                session.id for session in self.executionSessions
            ]

            if len(self.executionSessions) > 0:
                for plugin in self.testingStepPlugins:
                    plugin.testingStepFinished(self.testStep,
                                               self.executionSessions)

            for session in self.executionSessions:
                session.status = "completed"
                session.saveToDisk(self.config)

            self.testStep.saveToDisk(self.config)
            resultValue['successfulExecutionSessions'] = len(
                self.testStep.executionSessions)
            if len(self.testStep.executionSessions) == 0:
                resultValue['success'] = False
            else:
                resultValue['success'] = True

            for traceList in self.executionSessionTraceLocalPickleFiles:
                for fileName in traceList:
                    os.unlink(fileName)

        except selenium.common.exceptions.WebDriverException:
            # This error just happens sometimes. It has something to do with the chrome process failing to interact correctly
            # with mitmproxy. Its not at all clear what causes it, but the system can't auto retry from it unless the whole container
            # is killed. So we just explicitly catch it here so we don't trigger an error level log message, which gets sent to slack.
            # The manager process will safely restart this testing step.
            getLogger().warning(
                f"Unhandled exception occurred during testing sequence:\n{traceback.format_exc()}"
            )
            resultValue['success'] = False
            resultValue['exception'] = traceback.format_exc()
        except ProxyVerificationFailed:
            # Handle this errors gracefully without an error level message. This happens more often when our own servers go down
            # then when the proxy is actually not functioning
            getLogger().warning(
                f"Unhandled exception occurred during testing sequence:\n{traceback.format_exc()}"
            )
            resultValue['success'] = False
            resultValue['exception'] = traceback.format_exc()
        except Exception as e:
            getLogger().error(
                f"Unhandled exception occurred during testing sequence:\n{traceback.format_exc()}"
            )
            resultValue['success'] = False
            resultValue['exception'] = traceback.format_exc()

        # This print statement will trigger the parent manager process to kill this process.
        getLogger().info(f"Finished Running Testing Sequence!")

        return resultValue
Пример #7
0
    def runTraining(self):
        success = True
        exception = None

        try:
            try:
                multiprocessing.set_start_method('spawn')
            except RuntimeError:
                pass
            if self.config['print_configuration_on_startup']:
                getLogger().info(f"Starting Training Step with configuration:\n{pformat(self.config.configData)}")
            else:
                getLogger().info(f"Starting Training Step")

            self.initializeGPU()
            self.createTrainingStep()
            self.loadTestingSteps()

            if len(self.testingSteps) == 0:
                errorMessage = f"Error, no test sequences to train on for training step."
                getLogger().warning(f"{errorMessage}")
                getLogger().info(f"==== Training Step Completed ====")
                return {"success": False, "exception": errorMessage}

            self.agent = DeepLearningAgent(config=self.config, whichGpu=self.gpu)
            self.agent.initialize()
            self.agent.load()

            self.createSubproccesses()

            for plugin in self.plugins:
                plugin.trainingStepStarted(self.trainingStep)

        except Exception as e:
            errorMessage = f"Error occurred during initiation of training! {traceback.format_exc()}"
            getLogger().warning(f"{errorMessage}")
            return {"success": False, "exception": errorMessage}

        try:
            self.threadExecutor = concurrent.futures.ThreadPoolExecutor(
                    max_workers=self.config['training_max_batch_prep_thread_workers'] * self.config['training_batch_prep_subprocesses'])

            self.queueBatchesForPrecomputation()

            while self.trainingStep.numberOfIterationsCompleted < self.config['iterations_per_training_step']:
                loopStart = datetime.now()

                self.updateBatchPrepStarvedState()
                batches = self.fetchBatchesForIteration()

                success = self.learnFromBatches(batches)
                if not success:
                    break

                if self.trainingStep.numberOfIterationsCompleted % self.config['training_update_target_network_every'] == (self.config['training_update_target_network_every'] - 1):
                    getLogger().info(f"Updating the target network weights to the current primary network weights.")
                    self.agent.updateTargetNetwork()

                self.trainingStep.numberOfIterationsCompleted += 1

                if self.trainingStep.numberOfIterationsCompleted % self.config['print_loss_iterations'] == (self.config['print_loss_iterations'] - 1):
                    if self.gpu is None or self.gpu == 0:
                        getLogger().info(f"Completed {self.trainingStep.numberOfIterationsCompleted + 1} batches. Overall average time per batch: {numpy.average(self.loopTimes[-25:]):.3f}. Core learning time: {numpy.average(self.coreLearningTimes[-25:]):.3f}")
                        self.printMovingAverageLosses()
                        if self.config['print_cache_hit_rate']:
                            getLogger().info(f"Batch cache hit rate {100 * numpy.mean(self.recentCacheHits[-self.config['print_cache_hit_rate_moving_average_length']:]):.0f}%")

                if self.trainingStep.numberOfIterationsCompleted % self.config['iterations_between_db_saves'] == (self.config['iterations_between_db_saves'] - 1):
                    if self.gpu is None or self.gpu == 0:
                        self.trainingStep.saveToDisk(self.config)

                for plugin in self.plugins:
                    plugin.iterationCompleted(self.trainingStep)

                loopEnd = datetime.now()
                self.loopTimes.append((loopEnd - loopStart).total_seconds())

            getLogger().info(f"Finished the core training loop. Saving the training step {self.trainingStep.id}")
            self.trainingStep.endTime = datetime.now()
            self.trainingStep.averageTimePerIteration = (self.trainingStep.endTime - self.trainingStep.startTime).total_seconds() / self.trainingStep.numberOfIterationsCompleted
            self.trainingStep.averageLoss = float(numpy.mean(self.trainingStep.totalLosses))
            self.trainingStep.status = "completed"

            for plugin in self.plugins:
                plugin.trainingStepFinished(self.trainingStep)

            self.trainingStep.saveToDisk(self.config)

            self.threadExecutor.shutdown(wait=True)

            self.saveAgent()
            self.shutdownAndJoinSubProcesses()

        except Exception:
            getLogger().error(f"Error occurred while learning sequence!\n{traceback.format_exc()}")
            success = False
            exception = traceback.format_exc()
        finally:
            files = os.listdir(self.batchDirectory)
            for file in files:
                os.unlink(os.path.join(self.batchDirectory, file))
            shutil.rmtree(self.batchDirectory)

            del self.agent

        # This print statement will trigger the parent manager process to kill this process.
        getLogger().info(f"==== Training Step Completed ====")
        returnData = {"trainingStepId": str(self.trainingStep.id), "success": success}
        if exception is not None:
            returnData['exception'] = exception

        return returnData