Пример #1
0
    def handleImbalanceProblem(self, dataset, labels, model, epochs):
        model.getCompiledModel()
        model = model.model

        test_labels_orig = test_labels_pred = None

        train_images, test_images, train_labels, test_labels = DataProcessor.trainTestSplit(
            dataset, labels)
        train_images, test_images, train_labels, test_labels = DataProcessor.prepareDatasetForFit(
            train_images, test_images, train_labels, test_labels)

        dataSequence = DatasetSequence(train_images, train_labels, test_images,
                                       test_labels, model, epochs)
        history = model.fit(dataSequence,
                            epochs=epochs,
                            validation_data=(test_images, test_labels))

        predictions = model.predict(dataSequence.test_data)
        if predictions.shape[1] > 1:
            test_labels_orig = np.argmax(dataSequence.test_labels, axis=1)
            test_labels_pred = np.argmax(predictions, axis=1)
        else:
            test_labels_orig = dataSequence.test_labels
            test_labels_pred = np.where(predictions > 0.5, 1, 0)
        logger.info(
            classification_report(y_true=test_labels_orig,
                                  y_pred=test_labels_pred,
                                  zero_division=1))
        logger.info(
            precision_recall_fscore_support(y_true=test_labels_orig,
                                            y_pred=test_labels_pred,
                                            average='weighted'))
        return history
Пример #2
0
    def handleImbalanceProblem(self, dataset, labels, model, epochs):
        model.getCompiledModel(loss=self.meanFalseError, runEagerly=True)
        model = model.model

        train_images, test_images, train_labels, test_labels = DataProcessor.trainTestSplit(
            dataset, labels)
        train_images, test_images, train_labels, test_labels = DataProcessor.prepareDatasetForFit(
            train_images, test_images, train_labels, test_labels)

        history = model.fit(train_images,
                            train_labels,
                            epochs=epochs,
                            validation_data=(test_images, test_labels))

        predictions = model.predict(test_images)
        if predictions.shape[1] > 1:
            test_labels_orig = np.argmax(test_labels, axis=1)
            test_labels_pred = np.argmax(predictions, axis=1)
        else:
            test_labels_orig = test_labels
            test_labels_pred = np.where(predictions > 0.5, 1, 0)
        logger.info(
            classification_report(y_true=test_labels_orig,
                                  y_pred=test_labels_pred))

        logger.info(
            precision_recall_fscore_support(y_true=test_labels_orig,
                                            y_pred=test_labels_pred,
                                            average='weighted'))
        return history
Пример #3
0
    def __init__(self, topology_model):

        self.topology = topology_model

        # create a workspace
        self.base_path = Compiler.generate_basic_workspace()
        # code file path
        file_name = os.path.join(self.base_path, 'topology.json')

        try:
            try:
                with open(file_name, 'w') as text_file:
                    t_dict = self.build_backend_json(self.topology)
                    t_json = json.dumps(t_dict)
                    logger.info(t_json)
                    text_file.write(t_json)
            except OSError as e:
                raise TopologyWriteException(e.message)

            self.generate_pom_file(
                base_path=self.base_path,
                name=self.topology[TopologyConsts.FIELD_NAME],
                version='1.0',
                dependencies=self.build_dependencies())

        except ModuleWriteException as Ex:
            raise Ex
        except ModuleErrorCompilingException as Ex:
            raise Ex
Пример #4
0
 def get_log(topology_name, num_lines=10):
     filenames = StormUI.getWorkersByTopologyName(topology_name)
     lines = ""
     if filenames:
         logger.debug("Filenames for topology " + topology_name + " -> " +
                      str(filenames))
         for filename in filenames:
             logger.info("Topology :" + topology_name + " - LogFilename: " +
                         filename)
             # get log file from storm cluster
             n_lines = int(num_lines) * 100
             content = StormUI.getFile(filename, n_lines)
             try:
                 # Remove HTML tags from Storm Log 8000 port
                 logcontent = BeautifulSoup(content, "lxml").find(
                     "pre", {"id": "logContent"})
                 if logcontent:
                     lines += "\n###################################################\n"
                     (hostname, port,
                      name) = LogViewer.parse_worker_info(filename)
                     lines += "Log from worker: " + hostname + " - " + name + "\n"
                     lines += "###################################################\n"
                     lines += logcontent.getText()
                     logger.debug("Getting " +
                                  str(len(lines.splitlines())) + " lines.")
             except Exception as e:
                 return "Error parsing data from Storm UI:" + str(e)
     return lines
Пример #5
0
    def copyID(self,keyModel,username=None,host=None):
        from logger.Logger import logger
        self.keyModel=keyModel
        self.pubkey=self.keyModel.getPubKey()

        # Use of a singleton here means that we should be able to do SSO on any AAF/Shibolleth web service. However we might have to guess the IdP.
        self.session=cvlsshutils.RequestsSessionSingleton.RequestsSessionSingleton().GetSession()
        destURL='https://portal.synchrotron.org.au:443/api/v1/oauth/token'
        auth=cvlsshutils.ASyncAuth.ASyncAuth(self.session,destURL,parent=self.parent,extraParams=self.extraParams,**self.kwargs)
        apitoken=auth.gettoken()
        self.updateDict=auth.getUpdateDict()
        try:
            self.postKey(apitoken)
        except Exception as e:
            raise e
        logger.info('copied pub key %s to user account %s'%(self.pubkey,self.username))
    def balanceDataset(self, dataset, labels):
        classes, classesCount = np.unique(labels, return_counts=True)
        logger.info(classesCount)
        highestClassCount = np.amax(classesCount)
        for classId, classCount in zip(classes, classesCount):
            if classCount < highestClassCount:
                amountToCopy = highestClassCount - classCount
                logger.info("Oversampling class {} with {} samples".format(
                    classId, amountToCopy))
                classIndexes = np.where(labels == classId)[0].tolist()
                indexesToCopy = random.choices(classIndexes, k=amountToCopy)
                copiedDataset = np.copy(dataset[indexesToCopy, ])
                copiedLabels = np.copy(labels[indexesToCopy, ])

                dataset = np.concatenate((dataset, copiedDataset))
                labels = np.concatenate((labels, copiedLabels))
        return dataset, labels
Пример #7
0
 def add_java_module(module_name, module_type):
     cmd_launch = [
         "cd", conf.CLASSES_TMP_PATH + ";", conf.JAVA_JAR_BIN, "uf",
         conf.SINFONIER_LAST_JAR, module_type + module_name + ".class"
     ]
     os.remove(conf.CLASSES_TMP_PATH + module_type + module_name + ".class")
     result = CommandExecutor.execute(cmd_launch, capture_out=True)
     f = open(conf.CLASSES_TMP_PATH + module_type + module_name + ".info")
     listclasses = f.read().splitlines()
     f.close()
     for classjava in listclasses:
         logger.info("Updating JAR with class " + classjava)
         cmd_launch = [
             "cd", conf.CLASSES_TMP_PATH + ";", conf.JAVA_JAR_BIN, "uf",
             conf.SINFONIER_LAST_JAR,
             module_type + classjava.replace("$", "\$")
         ]
         CommandExecutor.execute(cmd_launch, capture_out=True)
         os.remove(conf.CLASSES_TMP_PATH + module_type + classjava)
     os.remove(conf.CLASSES_TMP_PATH + module_type + module_name + ".info")
Пример #8
0
def request_job():
    start_time = time.time()
    content = request.get_json(silent=True)
    priority = content['priority']
    cpu = content['cpu']
    logger.info('Priority : %s, CPU : %s' % (priority, cpu))

    if priority == 'None':
        print('Priority is None.')
        priority = -1
        output = check_output('bash /STREAM/run.sh', shell=True, stderr=STDOUT)
    else:
        output = check_output('bash /STREAM/run.sh %s' % (priority),
                              shell=True,
                              stderr=STDOUT)
    output = output.decode('utf-8').replace('\n', '')
    end_time = time.time()

    store_result(priority, output, cpu, start_time, end_time)
    return json.dumps({'status': 'success'})
    def handleImbalanceProblem(self, dataset, labels, model, epochs):
        model.getCompiledModel()
        model = model.model

        flatten = labels.flatten()
        class_weights = class_weight.compute_class_weight(
            'balanced', classes=np.unique(labels), y=flatten)
        classWeightsMap = {
            idx: ratio
            for idx, ratio in enumerate(class_weights)
        }

        train_images, test_images, train_labels, test_labels = DataProcessor.trainTestSplit(
            dataset, labels)
        train_images, test_images, train_labels, test_labels = DataProcessor.prepareDatasetForFit(
            train_images, test_images, train_labels, test_labels)

        history = model.fit(train_images,
                            train_labels,
                            epochs=epochs,
                            validation_data=(test_images, test_labels),
                            class_weight=classWeightsMap)

        predictions = model.predict(test_images)
        if predictions.shape[1] > 1:
            test_labels_orig = np.argmax(test_labels, axis=1)
            test_labels_pred = np.argmax(predictions, axis=1)
        else:
            test_labels_orig = test_labels
            test_labels_pred = np.where(predictions > 0.5, 1, 0)
        logger.info(
            classification_report(y_true=test_labels_orig,
                                  y_pred=test_labels_pred))
        logger.info(
            precision_recall_fscore_support(y_true=test_labels_orig,
                                            y_pred=test_labels_pred,
                                            average='weighted'))
        return history
    def on_epoch_end(self):
        self.counter += 1
        if self.counter == 10:
            logger.info("return")
            return

        predictions = self.model.predict(self.train_data)
        loss = tfa.losses.sigmoid_focal_crossentropy(
            tf.cast(self.train_labels, tf.float32), predictions)

        if predictions.shape[1] > 1:
            y_true = np.argmax(self.train_labels, axis=1)
            y_pred = np.argmax(predictions, axis=1)
        else:
            y_true = self.train_labels
            y_pred = np.where(predictions > 0.5, 1, 0)

        classes, classesCount = np.unique(y_true, return_counts=True)
        highestClassCount = np.amax(classesCount)

        for classId, classCount in zip(classes, classesCount):

            if classCount < highestClassCount:
                # hard mining
                amountToCopy = highestClassCount - classCount
                classIndexes = np.where(y_true == classId)[0].tolist()
                classesLoss = [(idx, loss[idx]) for idx in classIndexes]
                classesLoss.sort(key=lambda x: x[1])
                indexesToCopy = [
                    x[0] for x in classesLoss[-self.hardMiningSample:]
                ]

                for i in range(int(amountToCopy / self.hardMiningSample)):
                    copiedData = np.copy(self.train_data[indexesToCopy, ])
                    copiedLabels = np.copy(self.train_labels[indexesToCopy, ])
                    self.train_data = np.concatenate(
                        (self.train_data, copiedData))
                    self.train_labels = np.concatenate(
                        (self.train_labels, copiedLabels))

        logger.info(np.unique(self.train_data)[1])
        logger.info(self.train_data.shape)
        logger.info(self.train_labels.shape)
Пример #11
0
    def on_epoch_end(self):

        self.counter += 1
        if self.counter == self.epochs:
            logger.info("return")
            return
        predictions = self.model.predict(self.train_data)
        y_true = None
        y_pred = None
        if self.train_labels.shape[1] > 1:
            y_true = np.argmax(self.train_labels, axis=1)
            y_pred = np.argmax(predictions, axis=1)
        else:
            y_true = self.train_labels
            y_pred = np.where(predictions > 0.5, 1, 0)

        report = classification_report(y_true=y_true,
                                       y_pred=y_pred,
                                       output_dict=True)
        if self.counter == self.epochs - 1:
            logger.info(classification_report(y_true=y_true, y_pred=y_pred))

        classesCounts = np.unique(y_true, return_counts=True)[1]
        averageClassSize = math.ceil(np.mean(classesCounts))

        totalsScores = [
            report[label]['f1-score'] for label in report
            if label not in ["accuracy", "macro avg", "weighted avg"]
        ]
        for ix, s in enumerate(totalsScores):
            logger.info("{} : {}".format(ix, s))

        totals = [
            1 - report[label]['f1-score'] for label in report
            if label not in ["accuracy", "macro avg", "weighted avg"]
        ]
        f1ScoreTotal = np.sum([
            1 - report[label]['f1-score'] for label in report
            if label not in ["accuracy", "macro avg", "weighted avg"]
        ]) / len(classesCounts)
        updatedSampleSize = {}
        for label in report:
            if label not in ["accuracy", "macro avg", "weighted avg"]:
                updatedSampleSize[int(label)] = int(
                    ((1 - report[label]['f1-score']) / f1ScoreTotal) *
                    averageClassSize)
        logger.info(updatedSampleSize)
        logger.info("mean class size: {}".format(averageClassSize))
        classes, classesCount = np.unique(y_true, return_counts=True)
        logger.info(classesCount)

        for classId, classCount in zip(classes, classesCount):
            if classCount > updatedSampleSize[classId]:
                logger.info("Undersample class {}".format(classId))
                amountToRemove = classCount - updatedSampleSize[classId]
                classIndexes = np.where(y_true == classId)[0].tolist()
                indexesToRemove = random.sample(classIndexes, amountToRemove)
                self.train_data = np.delete(self.train_data, indexesToRemove,
                                            0)
                self.train_labels = np.delete(self.train_labels,
                                              indexesToRemove, 0)

            elif classCount < updatedSampleSize[classId]:
                logger.info("Oversample class {}".format(classId))
                amountToCopy = updatedSampleSize[classId] - classCount
                classIndexes = np.where(y_true == classId)[0].tolist()
                indexesToCopy = random.choices(classIndexes, k=amountToCopy)
                copiedDataset = np.copy(self.train_data[indexesToCopy, ])
                copiedLabels = np.copy(self.train_labels[indexesToCopy, ])
                self.train_data = np.concatenate(
                    (self.train_data, copiedDataset))
                self.train_labels = np.concatenate(
                    (self.train_labels, copiedLabels))

            if self.train_labels.shape[1] > 1:
                y_true = np.argmax(self.train_labels, axis=1)
            else:
                y_true = self.train_labels
Пример #12
0
    parser.add_argument('--production', action='store_true')
    parser.add_argument('--docker', action='store_true')
    args = parser.parse_args()

    if args.production:
        os.environ[EnvConst.SINFONIER_ENV_KEY] = EnvConst.PROD_ENVIRONMENT
    elif args.docker:
        os.environ[EnvConst.SINFONIER_ENV_KEY] = EnvConst.DOCKER_ENVIRONMENT
    else:
        os.environ[EnvConst.SINFONIER_ENV_KEY] = EnvConst.DEVELOP_ENVIRONMENT

    from config.config import conf
    from config.Routes import Routes
    from logger.Logger import logger

    routes = Routes()
    api = routes.api

    from wsgiref import simple_server

    httpd = simple_server.make_server(conf.SINFONIER_API_HOST,
                                      conf.SINFONIER_API_PORT, api)
    logger.info('Server up! running in ' + conf.SINFONIER_API_HOST + ':' +
                str(conf.SINFONIER_API_PORT))
    logger.info(os.environ[EnvConst.SINFONIER_ENV_KEY].upper() + ' mode')
    httpd.serve_forever()
else:
    from utils.SinfonierConstants import Environment
    os.environ[Environment.SINFONIER_ENV_KEY] = Environment.PROD_ENVIRONMENT
    app = running_server()
Пример #13
0
 def log(self):
     logger.info(self.stdout)
     if self.stderr:
         logger.error(self.stderr)