Exemplo n.º 1
0
    def __init__(self,
                 pipeline_dir,
                 prepared_dir,
                 classifier_dir,
                 pretrained,
                 normalizer_dir,
                 model_yaml_path,
                 encoder_level='char',
                 decoder_level='char',
                 onmt_dir='./OpenNMT-py',
                 language='en'):
        """
        pretrained =  None to disable the classifier
        model_yaml_path = None to use Rule-based normalizer
        """
        self.pipeline_dir = pipeline_dir
        self.prepared_dir = prepared_dir
        self.classifier_dir = classifier_dir
        self.pretrained = pretrained
        self.normalizer_dir = normalizer_dir
        self.encoder_level = encoder_level
        self.decoder_level = decoder_level
        self.onmt_dir = onmt_dir

        check_folder(self.pipeline_dir)
        check_folder(self.pipeline_dir + '/tmp')
        self.Classifier = Classifier(pretrained, prepared_dir, classifier_dir)
        self.Normalizer = Normalizer(model_yaml_path,
                                     prepared_dir,
                                     normalizer_dir,
                                     norm_only=False if pretrained else True,
                                     onmt_dir=onmt_dir,
                                     encoder_level=encoder_level,
                                     decoder_level=decoder_level,
                                     language=language)
Exemplo n.º 2
0
    def __init__(self):
        classifier = Classifier()
        classifier.Build()

        # Trainer, Evaluator
        print("Reading Training set...")
        # self.setdata('something')
        self.trainer = Trainer(classifier)

        self.trainEvaluator = Evaluator("train",
                                        dataSettings.PATH_TO_TRAIN_SET_CATELOG,
                                        classifier)
        print("\t Done.\n")

        print("Reading Validation set...")
        self.validationEvaluator = Evaluator(
            "validation", dataSettings.PATH_TO_VAL_SET_CATELOG, classifier)
        print("\t Done.\n")

        print("Reading Test set...")
        self.testEvaluator = Evaluator("test",
                                       dataSettings.PATH_TO_TEST_SET_CATELOG,
                                       classifier)
        print("\t Done.\n")

        # Summary
        summaryOp = tf.summary.merge_all()
        self.trainer.SetMergedSummaryOp(summaryOp)
        self.trainEvaluator.SetMergedSummaryOp(summaryOp)
        self.validationEvaluator.SetMergedSummaryOp(summaryOp)
        self.bestThreshold = None
        self.testEvaluator.SetMergedSummaryOp(summaryOp)

        # Time
        self._startTrainEpochTime = time.time()
        self._trainCountInOneEpoch = 0

        # Saver
        self.modelSaver = tf.train.Saver(
            max_to_keep=trainSettings.MAX_TRAINING_SAVE_MODEL)

        # Session
        self.session = tf.Session()
        init = tf.global_variables_initializer()
        self.session.run(init)

        self.trainer.SetGraph(self.session.graph)
        self.validationEvaluator.SetGraph(self.session.graph)
Exemplo n.º 3
0
def detect(transport_type: int,
           route_name: str,
           connector: Connector,
           logger=None):
    # загрузка классификатора
    classifier = Classifier.load(transport_type=transport_type,
                                 connector=connector,
                                 logger=logger)

    # загрузка информации о маршруте
    route_df = connector.getRouteData(route_name)
    data = route_df[[
        "ON_DAY",
        "SUM_VALUES",
        "MAX(AGE1_SEX0)",
        "MAX(AGE2_SEX0)",
        "MAX(AGE3_SEX0)",
        "MAX(AGE1_SEX1)",
        "MAX(AGE2_SEX1)",
        "MAX(AGE3_SEX1)",
    ]].values
    route_nums = route_df[["ROUTE_NUMBER"]].values[:, 0]

    # получение меток классов
    labels = classifier.classify(data)

    # выделение аномальных данных
    anomaly_df = classifier.getAnomalys(data, route_nums, labels)

    if logger is not None:
        logger.info('Route ' + route_name + ' detected ' +
                    str(len(anomaly_df)) + ' anomalies in data')
    return anomaly_df
Exemplo n.º 4
0
    def classify(self, entry):
        predictions = {}

        # Fill a dict with all the predictions and its scores.
        for tree in self.Forest:
            try:
                predictions[Classifier(entry, tree)] += 1
            except KeyError:
                predictions[Classifier(entry, tree)] = 1
        # Get the most voted prediction
        max = 0
        max_prediction = None
        for p in predictions:
            if (predictions[p] > max):
                max = predictions[p]
                max_prediction = p
        return max_prediction
Exemplo n.º 5
0
    def btnDiagnose_clicked(self):
        ip = ImageProcessing()
        ip.main()
        bve = BloodVesselsExtract()
        bve.main()
        mex = MaExtract()
        mex.main()
        hd = HemorrhageDetection()
        hd.main()
        cl = Classifier()
        cl.training()

        df = pd.read_csv('records.csv')
        status = df["Diabetic_retinopathy_status"].values[0]
        grade = str(df["Diabetic_retinopathy_grade"].values[0])
        severity = df["Severity"].values[0]
        # print(status)
        # print(grade)
        # print(severity)
        self.txtDr.setText(status)
        self.txtGrade.setText(grade)
        self.txtSeverity.setText(severity)
        os.remove("records.csv")
Exemplo n.º 6
0
class Pipeline:
    def __init__(self,
                 pipeline_dir,
                 prepared_dir,
                 classifier_dir,
                 pretrained,
                 normalizer_dir,
                 model_yaml_path,
                 encoder_level='char',
                 decoder_level='char',
                 onmt_dir='./OpenNMT-py',
                 language='en'):
        """
        pretrained =  None to disable the classifier
        model_yaml_path = None to use Rule-based normalizer
        """
        self.pipeline_dir = pipeline_dir
        self.prepared_dir = prepared_dir
        self.classifier_dir = classifier_dir
        self.pretrained = pretrained
        self.normalizer_dir = normalizer_dir
        self.encoder_level = encoder_level
        self.decoder_level = decoder_level
        self.onmt_dir = onmt_dir

        check_folder(self.pipeline_dir)
        check_folder(self.pipeline_dir + '/tmp')
        self.Classifier = Classifier(pretrained, prepared_dir, classifier_dir)
        self.Normalizer = Normalizer(model_yaml_path,
                                     prepared_dir,
                                     normalizer_dir,
                                     norm_only=False if pretrained else True,
                                     onmt_dir=onmt_dir,
                                     encoder_level=encoder_level,
                                     decoder_level=decoder_level,
                                     language=language)

    def train(self,
              num_train_epochs=10,
              learning_rate=1e-5,
              weight_decay=1e-2,
              per_device_train_batch_size=16,
              per_device_eval_batch_size=16,
              mode='pipeline'):
        if mode == 'pipeline':
            print("---pipeline mode---")
            print("Start training classifier...")
            self.Classifier.train(num_train_epochs, learning_rate,
                                  weight_decay, per_device_train_batch_size,
                                  per_device_eval_batch_size)
            print("Start training normalizer...")
            self.Normalizer.train()

        elif mode == 'normalizer':
            print("---normalizer mode---")
            print("Start training normalizer...")
            self.Normalizer.train()

        elif mode == 'classifier':
            print("---classifier mode---")
            print("Start training classifier...")
            self.Classifier.train(num_train_epochs, learning_rate,
                                  weight_decay, per_device_train_batch_size,
                                  per_device_eval_batch_size)
        else:
            print("Mode Error! Skip all training!")

    def eval(self, key='test', normalizer_step=-1, use_gpu=True):
        # make test data as sentence
        df = read_sentence_from_csv("{}/{}.csv".format(self.prepared_dir, key))
        input_path = '{}/{}_input.txt'.format(self.pipeline_dir, key)
        target_path = '{}/{}_target.txt'.format(self.pipeline_dir, key)
        cls_path = '{}/{}_classified.csv'.format(self.pipeline_dir, key)
        tbn_path = '{}/{}_TBNormed.txt'.format(self.pipeline_dir, key)
        norm_path = '{}/{}_normed.txt'.format(self.pipeline_dir, key)
        output_path = '{}/{}_output.txt'.format(self.pipeline_dir, key)

        df[['src_token']].to_csv(input_path, header=False, index=False)
        df[['tgt_token']].to_csv(target_path, header=False, index=False)

        print("Start evaluating classifier...")
        df_cls = self.Classifier.predict(input_path, cls_path)
        df_TBNorm = df_cls[df_cls.tag != 'O']
        df_TBNorm['token'].to_csv(tbn_path, header=False, index=False)

        print("Start evaluating normalizer...")
        df_nor = self.Normalizer.predict(input_path=tbn_path,
                                         output_path=norm_path,
                                         normalizer_step=normalizer_step,
                                         use_gpu=use_gpu)

        df_cls['pred'] = df_cls['token']
        id_TBNorm = df_cls.index[df_cls['tag'] == 'B'].tolist()
        df_cls.loc[id_TBNorm, 'pred'] = df_nor['pred'].tolist()
        result = df_cls.groupby(['sentence_id']).agg({'pred': ' '.join})
        result[['pred']].to_csv(output_path, header=False, index=False)

        command_wer = 'wer {ref_path} {hyp_path}'.format(ref_path=target_path,
                                                         hyp_path=output_path)
        print(command_wer)
        print(os.popen(command_wer).read())

    def predict(self,
                input_path,
                output_path,
                normalizer_step=-1,
                use_gpu=True):
        tmp_dir = self.pipeline_dir + '/tmp'
        key = 'tmp'
        cls_path = '{}/{}_classified.csv'.format(tmp_dir, key)
        tbn_path = '{}/{}_TBNormed.txt'.format(tmp_dir, key)
        norm_path = '{}/{}_normed.txt'.format(tmp_dir, key)

        print("Start predicting classifier...")
        df_cls = self.Classifier.predict(input_path, cls_path)
        df_TBNorm = df_cls[df_cls.tag != 'O']
        df_TBNorm['token'].to_csv(tbn_path, header=False, index=False)

        print("Start predicting normalizer...")
        df_nor = self.Normalizer.predict(input_path=tbn_path,
                                         output_path=norm_path,
                                         normalizer_step=normalizer_step,
                                         use_gpu=use_gpu)

        df_cls['pred'] = df_cls['token']
        id_TBNorm = df_cls.index[df_cls['tag'] == 'B'].tolist()
        df_cls.loc[id_TBNorm, 'pred'] = df_nor['pred'].tolist()
        result = df_cls.groupby(['sentence_id']).agg({'pred': ' '.join})
        result[['pred']].to_csv(output_path, header=False, index=False)
        print("Prediction saved to: ", output_path)
Exemplo n.º 7
0
              floatPrecision.format(videoAccuracy_), "     duration:",
              "{0:.2f}".format(duration_) + "(s)\n")
    else:
        print("\t     loss:", floatPrecision.format(loss_),
              "     frame accuracy:", floatPrecision.format(frameAccuracy_),
              "     given frame threshold:",
              threshold_, "     video accuracy:",
              floatPrecision.format(videoAccuracy_), "     duration:",
              "{0:.2f}".format(duration_) + "(s)\n")


if __name__ == '__main__':
    numberOfArguments = len(sys.argv)
    if (numberOfArguments == 2) or (numberOfArguments == 3):
        PATH_TO_DATA_SET_CATELOG = sys.argv[1]
        classifier = Classifier()
        classifier.Build()
        evaluator = Evaluator("evaluate", PATH_TO_DATA_SET_CATELOG, classifier)

        with tf.Session() as session:
            init = tf.global_variables_initializer()
            session.run(init)

            print("Load Model from: ", evalSettings.PATH_TO_MODEL_CHECKPOINTS)
            modelLoader = tf.train.Saver()
            modelLoader.restore(session,
                                evalSettings.PATH_TO_MODEL_CHECKPOINTS)

            startEvaluateTime = time.time()
            if numberOfArguments == 2:
                print("Start evaluate: ", PATH_TO_DATA_SET_CATELOG,
Exemplo n.º 8
0
import json

from src.Classifier import Classifier
from src.DBConnector import Connector

config_json = open('./connection.config')
config = json.load(config_json)
connector = Connector(config)

classifier = Classifier(1, connector, './samples/transport_1.csv')
import os
import sys
lib_path = os.path.abspath(os.path.join(sys.path[0], '..'))
sys.path.append(lib_path)
from src.Classifier import Classifier
from src.PreProcessing import PreProcessing
import numpy as np
import matplotlib.pyplot as plt
import time
from multiprocessing import Queue

classifier_classes = Classifier.classifier_dict()
abspath = os.path.abspath(sys.path[0])
CONFIG = {
    'frame size': 512,
    'overlap': 128,
    'sample rate': 44100,
    'is training': True,
    'is streaming': True,
    'continuous stream': False,
    'data list': '../DataSet/DataList_all.txt',
    'classifier': ['SVM', 'KNN'],
    'argumentation': True,
    'debug tool': False,
    'error stat': False
}

frame_count = np.zeros((101,))
print(frame_count.shape, frame_count)

from src.LocalBinaryPatternUtil.Params import Params
from src.PatternBuilder import PatternBuilder
from src.Classifier import  Classifier
import src.LogoExtractor as extractor
import os

where = os.path.dirname(os.path.realpath(__file__))

logoDictionary={'ChangAn':1,'VolksWagen':2, 'JiangHuai':3, 'JinBei':4, 'KaiRui':5, 'QiRui':6, 'Hyundai':7}
sourcePath = where + "/src/resources/logo_template"
histogramPath = where + "/src/resources/lbph.dat"
params = Params(1, 6, 4, 4)

# TODO ::: make input image configurable. later...
logo = extractor.logoExtraction(where+ "/src/resources/testImages/volkswagen1.jpg")

patternBuilder = PatternBuilder(params, logoDictionary, sourcePath, histogramPath)
patternBuilder.buildAll()
patternBuilder.cache.saveHistogramData()

classifer = Classifier(patternBuilder.cache, params, logoDictionary)
classifer.guessLogo(logo)
Exemplo n.º 11
0
from src.Type import Type
from src.Classifier import Classifier
from src.utils import run_experiment

if __name__ == '__main__':
    nb_classifier = Classifier(class_type=Type.NB, feature_cut_off=0)
    svm_classifier = Classifier(class_type=Type.SVM, feature_cut_off=0)

    print("unigrams")
    # unigrams
    run_experiment(nb_classifier, svm_classifier, "unigrams")

    # unigrams + stemming
    nb_classifier.stemming = True
    svm_classifier.stemming = True
    run_experiment(nb_classifier, svm_classifier, "unigrams and stemming")

    # unigrams + frequency
    nb_classifier.frequency = True
    svm_classifier.frequency = True
    run_experiment(nb_classifier, svm_classifier,
                   "unigrams, stemming and frequency")

    # unigrams + stemming + frequency
    nb_classifier.stemming = False
    svm_classifier.stemming = False
    run_experiment(nb_classifier, svm_classifier, "unigrams and frequency")

    print("bigrams")
    # bigrams + stemming
    nb_classifier.stemming = True
Exemplo n.º 12
0
import pandas
from src.Classifier import Classifier

classifier = Classifier(1, None, "./samples/transport_1.csv")

print(classifier.accuracy)