def main():
    clf = Classify()
    while (True):
        word = input('Word: ')
        classification = clf.classifyWord(word)
        if (classification[0] > classification[1]):
            print("Telugu, with prob matrix")
        else:
            print("English, with prob matrix")
        print(classification)
示例#2
0
 def activity(self):
     self.output.setText("请稍等几秒钟......")
     sentence = self.input.text()
     print(self.input.text())
     args = parse_args()
     print("ok")
     IE = TripleIE(sentence, args.ltp, args.clean)
     out = IE.run()
     if re.search(r"句子结构不完整", out, flags=0) == None:
         CL = Classify(sentence, out)
         out += "句子类型:" + CL.SentenceType()
     self.output.setText(out)
    def __init__(self, type):

        if type == 'train':
            self.c = Classify(train=False, type='train')
        else:
            self.c = Classify(train=False, type='valid')
        self.c.load_data()
        self.labels = self.c.temp

        self.tgt_sentences = self.c.sentences
        self.src_sentences = self.c.raw_sentence

        self.trainData = []
    def selectMode(self, configDict, automatic):

        mode = ""

        if automatic == True:

            mode = getattr(self, "mode")

        else:

            while not mode:

                mode = input(
                    "Enter mode. \'prepare\' , \'classify\' , \'train\' or \'predict\'\n"
                )
                if mode == "classify" or mode == "train" or mode == "predict" or mode == "prepare":
                    print("Correct mode chosen.")
                else:
                    print("Wrong mode!")
                    mode = ""

        if mode == "classify":

            from classify import Classify

            classify = Classify(**dict(configDict["Classify"]))
            classify.classifyMultipleImages()

        elif mode == "train":

            from trainModel import TrainModel
            train = TrainModel(**dict(configDict["Train"]))
            train.createModel()

        elif mode == "predict":

            from predictService import PredictService
            predict = PredictService(**dict(configDict["Predict"]))
            predict.downloadImage()
            predict.predict()

        elif mode == "prepare":

            args = {
                "pathToConfig": configDict["Prepare"]["configPath"],
            }

            self.GlobalServices(**args).prepareFolders(
                input(
                    "Enter root folder name. (Contains default folder structure.)\n"
                ))
示例#5
0
    def classify(self, options):
        """Determine taxonomic classification of genomes."""

        check_dir_exists(options.align_dir)
        make_sure_path_exists(options.out_dir)

        genomes = self._genomes_to_process(options.genome_dir,
                                           options.batchfile,
                                           options.extension)

        classify = Classify(options.cpus)
        classify.run(genomes, options.align_dir, options.out_dir,
                     options.prefix, options.debug)

        self.logger.info('Done.')
示例#6
0
 def main():
     print "Creating Train File..."
     Data.CreateDataFile("mails-train", "mails-train.txt")
     print "Initializing Train File..."
     trainingSet = Data.LoadFile("mails-train.txt")
     print "Creating Test File..."
     Data.CreateDataFile("mails-test", "mails-test.txt")
     print "Initializing Test File..."
     testSet = Data.LoadFile("mails-test.txt")
     print "Extracting Classes.."
     classes = Train.ExtractClasses(trainingSet)
     print "Training NBC..."
     vocabulary, prior, condprob = Train.TrainMultinomialNaiveBayes(
         classes, trainingSet)
     print "Testing Accuracy..."
     percentage = Test.Accuracy(classes, vocabulary, prior, condprob,
                                testSet)
     print "The percentage of correct predictions is ", 100 * percentage, "percent."
     print "Get Random Document..."
     testDocument = Data.GetDocument(testSet)
     print "Do A Time Measurement of the Application of the NBC..."
     print "The time is took to do a single application of the NBC on a document is", Test.TimeMeasure(
         classes, vocabulary, prior, condprob, testDocument), "seconds."
     print "Applying NBC on Document..."
     topClass, score = Classify.ApplyMultinomialNaiveBayes(
         classes, vocabulary, prior, condprob, testDocument['document'])
示例#7
0
def main(test_file, threshold):
    #extract pdfs
    train = Train(1)
    #print train.tot_mean
    #print train.tot_std
    #compute probability for test data
    ai = Classify(test_file, train, threshold)
    return 0
示例#8
0
 def TimeMeasure(classes, vocabulary, prior, condprob, document):
     # Neemt een tijdmeting voor en na het uitvoeren van ApplyMultinomialNBC om te kijken
     # hoelang het duurt om deze uit te voeren op het gegeven document.
     start = time.time()
     Classify.ApplyMultinomialNaiveBayes(classes, vocabulary, prior,
                                         condprob, document)
     end = time.time()
     return end - start
示例#9
0
def make_forest(n=10):

    forest = []
    for i in range(n):
        train = load.load_train(2000)
        train_labels = np.argmax(train[1], axis=1)
        classify = Classify(6, train[0], train_labels)
        forest.append(classify)

    return forest
 def predict_accident(self):
     insert_into_DB = 1
     db = DBConnection()
     conn = db.get_connection()
     mycursor = conn.cursor()
     mycursor.execute("SELECT path FROM buffer")
     buffer_items = mycursor.fetchall()
     for path_row in buffer_items:
         path = path_row[0]
         clf = Classify(path)
         class_name, percentage = clf.classify_image()
         if (class_name[0] is 'a'
                 or class_name[0] is 'A') and (insert_into_DB is 1):
             insert_into_DB = 0
             print('accident detected')
             Camera_id = 'CAM001'
             db1 = DBConnection()
             conn1 = db1.get_connection()
             mycursor1 = conn1.cursor()
             mycursor1.execute("SELECT count(path) FROM Accident")
             count_row = mycursor1.fetchone()
             new_path = '../accident/Accident' + str(count_row[0]) + '.jpg'
             copyfile(path, new_path)
             date_time = datetime.datetime.now().strftime(
                 "%Y-%m-%d %H:%M:%S")
             timestamp = time.time()
             sql1 = "insert into Accident(Camera_id,path,date_time,timestampAcc) values(%s,%s,%s,%s);"
             mycursor1.execute(
                 sql1, [Camera_id, new_path, date_time,
                        int(timestamp)])
             conn1.commit()
             mycursor1.execute(
                 "UPDATE flag set flag_var = 1 where flag_key = 1;")
             conn1.commit()
             mycursor1.execute(
                 "UPDATE smbool set continue_buffer = 0 where flag_var = 0")
             conn1.commit()
         if (insert_into_DB is 0):
             print('skipping database entry')
         sql = "DELETE FROM buffer WHERE path = %s"
         mycursor.execute(sql, [path])
         conn.commit()
         os.remove(path)
 def generate_model_output(self):
     self.classifier = Classify(self.weights,
                                self.root_dir,
                                self.emails_threshold,
                                self.results_size,
                                results_dir=self.result_path_out,
                                serial_path=self.model_path_out,
                                memlog_freq=self.memlog_classify_frequency,
                                debug_training=self.debug_training,
                                filterRecipients=self.filter_recipients,
                                recipientTargetFile=self.recipients)
     logs.Watchdog.reset()
     self.classifier.generate_training()
     logs.Watchdog.reset()
     self.classifier.train_clf()
     logs.Watchdog.reset()
     self.classifier.cross_validate()
     logs.Watchdog.reset()
     self.classifier.test_and_report()
     logs.Watchdog.reset()
示例#12
0
    def testSingleFile(self):
        f = askopenfile(mode='r', defaultextension=".txt")
        if f is None: # askopenfile return `None` if dialog closed with "cancel".
            return
        lines = f.read()
        bagOfWords = re.split(' ',lines)
        singleFile = Data.Normalize(bagOfWords)

        print "Loaded."
        print "Calculating..."
        topClass, score = Classify.ApplyMultinomialNaiveBayes(self.classes, self.vocabulary, self.prior, self.condprob, singleFile)
        print "This document belongs to", topClass
        print "Done."
        f.close() # `()` was missing.
示例#13
0
 def Accuracy(classes, vocabulary, prior, condprob, dataset):
     # Voert ApplyMulitnomialNBC uit op een serie documenten, waarvan we de class kennen.
     # Er wordt geteld hoevaak de voorspelling overeenkomt met de echte waarde.
     # De teruggegeven waarde is een fractie tussen 0 en 1 die aangeeft welk deel van de keren de voorspelling correct was.
     correct = 0
     totaal = 0
     count = 0
     for c in dataset:
         for d in dataset[c]:
             topClass, score = Classify.ApplyMultinomialNaiveBayes(
                 classes, vocabulary, prior, condprob, dataset[c][d])
             if topClass == c:
                 correct += 1
             totaal += 1
     print "The amount of total tested documents is", totaal
     print "The amount of correct predictions are", correct
     return correct / totaal
 def generate_model_output(self):
     self.classifier = Classify(self.weights,
                                self.root_dir,
                                self.emails_threshold,
                                self.results_size,
                                results_dir=self.result_path_out,
                                serial_path=self.model_path_out,
                                memlog_freq=self.memlog_classify_frequency,
                                debug_training=self.debug_training)
     logs.Watchdog.reset()
     self.classifier.generate_training()
     logs.Watchdog.reset()
     self.classifier.train_clf()
     logs.Watchdog.reset()
     self.classifier.cross_validate()
     logs.Watchdog.reset()
     self.classifier.test_and_report()
     logs.Watchdog.reset()
示例#15
0
    def blind_test(feature_sets):
        classifier = Classify(init=False)
        classifier.network_filename = 'hl1__sizes-200-__acfn-rel-__dr-0.2-0.5-__lr0.1__mb100__mom0.9__seed1.hdf5'
        classifier.initialize_network()

        labels = []
        for image in feature_sets:
            image_1d = map(lambda value: value / 255.0, image)
            image_2d = []
            for row_idx in range(28):
                image_2d.append([])
                for col_idx in range(28):
                    image_2d[row_idx].append(image_1d[row_idx * 28 + col_idx])

            label = classifier.classify(image_2d)
            labels.append(label)
        return labels
from ie import TripleIE
from classify import Classify


def parse_args():
    parser = argparse.ArgumentParser('TripleIE')

    parser.add_argument('--ltp',
                        type=str,
                        default='E:\python\ltp_data_v3.4.0',
                        help='the path to LTP model')
    parser.add_argument('--clean',
                        action='store_true',
                        help='output the clean relation(no tips)')

    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()
    # sentence = "国务院总理李克强积极地快乐地调研美丽的上海"
    sentence = "什么时候放假呢"

    IE = TripleIE(sentence, args.ltp, args.clean)
    result = IE.run()
    if result != "句子结构不完整":
        CL = Classify(sentence, result)
        result += "句子类型:" + CL.SentenceType()

    print(result)
示例#17
0
from classify import Classify
from flask import request, Flask

app = Flask(__name__)
imgdir = './images/'

@app.route('/api/', methods=["POST"])
def classify_img():
    url = request.headers.get('url')
    return str(c.classify(url))

if __name__ == "__main__":
    c = Classify()
    app.run(debug=True)
class PhishDetector(object):

    def __init__(self):
        #Flag Configurations
        self.generate_data_matrix = False
        self.generate_test_matrix = False
        self.generate_model = False
        self.classify = False
        self.config_path = 'config.yaml'

        #Config File Configurations
        self.root_dir = None
        self.filename = None
        self.weights = None
        self.sender_profile_percentage = 0
        self.data_matrix_percentage = 0
        self.test_matrix_percentage = 0
        self.emails_threshold = 1000
        self.results_size = 10
        self.model_path_out = './model'
        self.result_path_out = './summary'
        self.detectors = None
        self.parallel = None

        #Generator and Classifier
        self.classifier = None

        self.parse_config()
        self.parse_args()


    def parse_args(self):
        """
        Parses command line arguments.
        """
        parser = argparse.ArgumentParser(description='Mange spear fishing detector.')
        parser.add_argument('--all',
                            action='store_true',
                            help=('Generate and serialize data matrix, test matrix, and ML model, then run ML model on test matrix'))
        parser.add_argument('--gen_all',
                            action='store_true',
                            help=('Generate and serialize data matrix, test matrix, and ML model'))
        parser.add_argument('--gen_data',
                            action='store_true',
                            help=('Generate and serialize data matrix'))
        parser.add_argument('--gen_test',
                            action='store_true',
                            help=('Generate and serialize test matrix'))
        parser.add_argument('--gen_model',
                            action='store_true',
                            help=('Generate and serialize ML model'))
        parser.add_argument('--classify',
                            action='store_true',
                            help=('Run ML model on test matrix'))
        parser.add_argument('--debug_training',
                            action='store_true',
                            help=('Debug the training step of the pipeline.'))
        parser.add_argument('--mbox',
                            action='store_true',
                            help=('Use emails from mbox rather than pcaps'))
        
        args = parser.parse_args()

        run = False
        self.debug_training = False
        if args.all:
            self.generate_data_matrix = True
            self.generate_test_matrix = True
            self.generate_model = True
            self.classify = True
            run = True
        if args.gen_all:
            self.generate_data_matrix = True
            self.generate_test_matrix = True
            self.generate_model = True
            run = True
        if args.gen_data:
            self.generate_data_matrix = True
            run = True
        if args.gen_test:
            self.generate_test_matrix = True
            run = True
        if args.gen_model:
            self.generate_model = True
            run = True
        if args.classify:
            self.classify = True
            run = True
        if args.debug_training:
            self.generate_data_matrix = True
            self.generate_test_matrix = True
            self.generate_model = True
            self.classify = True
            self.debug_training = True
            run = True


        if not run:
            parser.error('You must run with at least one flag')

    def parse_config(self):
        """
        Parses configuration file. Assumes configuration is in same directory as this script.
        """
        try:
            stream = file(self.config_path, 'r')
        except IOError:
            progress_logger.exception("Could not find yaml configuration file.")
            raise

        config = yaml.load(stream)
        
        expected_config_keys = [
            'root_dir',
            'regular_filename',
            'phish_filename',
            'sender_profile_percentage',
            'data_matrix_percentage',
            'test_matrix_percentage',
            'use_name_in_from',
            'model_path_out',
            'result_path_out',
            'weights',
            'detectors',
            'emails_threshold',
            'batch_threading_size',
            'offline',
            'results_size',
            'parallel',
            'num_threads',
            'logging_interval',
            'memlog_gen_features_frequency',
            'memlog_classify_frequency'
        ]

        try:
            for key in expected_config_keys:
                setattr(self, key, config[key])
        except KeyError:
            progress_logger.exception("Configuration file missing entry")
            raise

        detectors = []
        for detector, val in self.detectors.items():
            if val == 1:
                detectors.append(getattr(globals()['fc'], detector))

        self.detectors = detectors
        self.root_dir = os.path.abspath(self.root_dir)
        Lookup.initialize(offline=self.offline)

    def prep_features(self, directory):   
        regular_path = os.path.join(directory, self.regular_filename)
        phish_path = os.path.join(directory, self.phish_filename)

        feature_generator = FeatureGenerator(directory,
                                             regular_path,
                                             phish_path,
                                             self.sender_profile_percentage,
                                             self.data_matrix_percentage,
                                             self.test_matrix_percentage,
                                             self.detectors
                                            )

        feature_generator.do_generate_data_matrix = self.generate_data_matrix
        feature_generator.do_generate_test_matrix = self.generate_test_matrix
        return feature_generator


    def generate_features(self):
        if self.use_name_in_from != 0:
            Detector.USE_NAME = True

        dir_to_generate = []

        progress_logger.info('Starting directory aggregation in feature generation.')
        start_time = time.time()
        for dirpath, dirnames, filenames in os.walk(self.root_dir):
            if ((self.generate_data_matrix and self.regular_filename in filenames and self.phish_filename in filenames)
                or (self.generate_test_matrix and self.regular_filename in filenames)):
                dir_to_generate.append(dirpath)
                logs.Watchdog.reset()
        end_time = time.time()
        min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60)
        progress_logger.info('Finished directory aggregation in feature generation in {} minutes, {} seconds'.format(min_elapsed, sec_elapsed))
        
        BATCH_SIZE = self.batch_threading_size
        if self.parallel:
            progress_logger.info('Starting feature generation with {} threads in parallel with batch size {}...'.format(self.num_threads, BATCH_SIZE))
            start_time = time.time()
            feature_generators = []
            for directory in dir_to_generate:
                feature_generator = self.prep_features(directory)
                feature_generators.append(feature_generator)
                if len(feature_generators) == BATCH_SIZE:
                    p = Pool(self.num_threads)
                    p.map(run_generator, feature_generators)
                    p.close()
                    p.join()
                    feature_generators = []
            if len(feature_generators) > 0:
                p = Pool(self.num_threads)
                p.map(run_generator, feature_generators)
                p.close()
                p.join()
            end_time = time.time()
            min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60)
            progress_logger.info('Finished feature generation in {} minutes, {} seconds.'.format(min_elapsed, sec_elapsed))
        else:
            progress_logger.info('Starting feature generation serially for {} directories'.format(len(dir_to_generate)))
            start_time = time.time()
            last_logged_time = start_time
            dir_count = 0
            end_of_last_memory_track = dt.datetime.now()
            for directory in dir_to_generate:
                dir_count += 1
                logs.context = {'feature gen': dir_count}
                curr_time = time.time()
                if (curr_time - last_logged_time) > self.logging_interval * 60:
                    progress_logger.info('Processing directory #{} of {}'.format(dir_count, len(dir_to_generate)))
                    progress_logger.info('Feature generation has run for {} minutes'.format(int((curr_time - start_time) / 60)))
                    last_logged_time = curr_time
                feature_generator = self.prep_features(directory)
                feature_generator.run()
                logs.Watchdog.reset()
                now = dt.datetime.now()
                time_elapsed = now - end_of_last_memory_track
                minutes_elapsed = time_elapsed.seconds / 60.0
                if minutes_elapsed > self.memlog_gen_features_frequency:
                    MemTracker.logMemory('After generating features for {}th sender'.format(dir_count))
                    end_of_last_memory_track = dt.datetime.now()
                logs.context = {}
            end_time = time.time()
            min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60)
            progress_logger.info('Finished feature generation in {} minutes, {} seconds.'.format(min_elapsed, sec_elapsed))

    def generate_model_output(self):
        self.classifier = Classify(self.weights,
                                   self.root_dir,
                                   self.emails_threshold,
                                   self.results_size,
                                   results_dir=self.result_path_out,
                                   serial_path=self.model_path_out,
                                   memlog_freq=self.memlog_classify_frequency,
                                   debug_training=self.debug_training)
        logs.Watchdog.reset()
        self.classifier.generate_training()
        logs.Watchdog.reset()
        self.classifier.train_clf()
        logs.Watchdog.reset()
        self.classifier.cross_validate()
        logs.Watchdog.reset()
        self.classifier.test_and_report()
        logs.Watchdog.reset()


    def execute(self):
        detector_names = ', '.join([d.__name__ for d in self.detectors])
        progress_logger.info("Config settings: use_name_in_from={}, parallel={}, detectors={}".format(self.use_name_in_from, self.parallel, detector_names))

        start_time = time.time()
        MemTracker.initialize(memory_logger)
        logs.Watchdog.initialize()
        logs.context = {'phase': 'generate_features'}
        if self.generate_data_matrix or self.generate_test_matrix:
            self.generate_features()
        logs.context = {}
        MemTracker.logMemory("After generating features/Before generating model")
        logs.context = {'phase': 'generate_model_output'}
        if self.generate_model:
            self.generate_model_output()
        logs.context = {}
        MemTracker.logMemory("After generating model")
        end_time = time.time()
        min_elapsed, sec_elapsed = int((end_time - start_time) / 60), int((end_time - start_time) % 60)
        progress_logger.info("Phish Detector took {} minutes, {} seconds to run.".format(min_elapsed, sec_elapsed))
        logs.RateLimitedLog.flushall()
示例#19
0
from __future__ import division
import sys
import os
from classify import Classify
from db import Db

classifier = Classify()

def is_doctype_valid(doctype):
	return Db().get_words_count(doctype) > 0

def check_file(f):
	results = []
	for line in open(f, 'r').readlines():
		try:
			classifier.set_text(line)
			results += [classifier.execute()]
		except ValueError:
			pass
	
	return results

def check_dir(d):
	results = []
	for f in os.listdir(d):
		if f.endswith(".js"):
			results += check_file(os.path.join(d,f))

	return results

def show_results(results):
示例#20
0
import cv2
from classify import Classify
from CNN_model import CNNModel
from dataset_creation import CreateTrainData

CreateTrainData.createFrames(
    "frames")  #frames is a path where frames will be saved
CreateTrainData.dataToTrainAndTest()

CNNModel.runModel()
CNNModel.ModelResultsPlot("ggplot")

vs = cv2.VideoCapture('331.mp4')
Classify.classifyVideo(vs)
示例#21
0
def get_image_data():
    dat = request.form['javascript_data']
    saveToPNG(dat)
    pred = Classify('image2.png')
    return str(pred)
示例#22
0
    'ATL', 'LAX', 'ORD', 'DFW', 'DEN', 'JFK', 'SFO', 'LAS', 'SEA', 'CLT'
]
# airportList = ['LAX','BOS','ATL']
# airportList = ['ATL','LAX','ORD','DFW','DEN','JFK','SFO','LAS','SEA','CLT']#,  'EWR','MCO','PHX','MIA','IAH','BOS']#,'MSP','DTW','FLL','LGA']
ad = Preprocess(airportList=airportList)
# ad.parseData('2017.csv')

# create df for plotting before filtering out the airports
# ad.createplotdf()
# ad.initialPlots()

# filter by airport for the ML data
# ad.filterByAirport()
ad.createMLdf()

cl = Classify()

# tune the tree - choose depth = 10, minLeaf = 50
# cl.tuneTree_acc(depth=list(range(1,11)), minLeaf=[2,5,10,20,50,100])
# cl.tuneTree(depth=list(range(10,101,5)))
# cl.runTree(maxDepth=10, minLeaf=50, printRules=False)

# tune Logistic - choose C = 1
# cl.tuneLogistic()
# cl.runLogistic(C=1)

# tune SVM - choose C = 10
# cl.tuneSVM()
# cl.runSVM(C=10)

# tune NN - choose layers=(2,), alpha=0.01
import cv2
import numpy as np
import os

from classify import Classify

model = "face/face_lady_man.model"
labelbin = "face/face_lady_man"

_classify = Classify(model, labelbin)

recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer/trainer.yml')
cascadePath = "Cascades/haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascadePath)
font = cv2.FONT_HERSHEY_SIMPLEX
#iniciate id counter
id = 0
# names related to ids: example ==> Marcelo: id=1,  etc
names = ['0', 'AKN', 'ManGate']
# Initialize and start realtime video capture
cam = cv2.VideoCapture(0)
cam.set(3, 640)  # set video widht
cam.set(4, 480)  # set video height
# Define min window size to be recognized as a face
minW = 0.1 * cam.get(3)
minH = 0.1 * cam.get(4)
xx = 0
while True:
    xx = xx + 1
    ret, img = cam.read()
示例#24
0
文件: ex1.py 项目: Matii96/ai-classes
 def Classify(self):
     detection = Classify(self.matrix)
     if detection is None:
         self.detection = '?'
     else:
         self.detection = str(detection)
示例#25
0
 def post(self):
     data = request.json
     cl = Classify(data)
     return cl.result
        :return:
        """
        self.analyzer.load_model('title')
        title_vector = self.analyzer.transform([title])
        self.analyzer.load_model('abstract')
        abstract_vector = self.analyzer.transform([abstract])
        self.analyzer.load_model('claims')
        claims_vector = self.analyzer.transform([claims])

        feature_vector = hstack([title_vector, abstract_vector])
        feature_vector = hstack([feature_vector, claims_vector])

        return feature_vector

    def predict(self, feature_vector):
        """
        Predict class based on feature vector input
        :param feature_vector:
        :return:
        """
        group = self.classify.predict(feature_vector)
        return group


if __name__ == '__main__':
    config_info = Config()
    f = Factory(config_info)
    file = '2015_2016_Patent_Data_new.csv'
    feature_matrix, response_vector = f.get_all_column_data(file)
    f.classify = Classify(config_info, feature_matrix, response_vector)
    f.full_train()
示例#27
0
#-*- coding:utf-8 -*-
from classify import Classify
import numpy as np

import sys
reload(sys)
sys.setdefaultencoding("utf8")

if __name__ == "__main__":

    X_train = np.array([
        u"我想听张学友的歌", u"周杰伦的龙卷风", u"鹿晗有什么歌好听", u"姚明打篮球好厉害", u"张继科会打乒乓球",
        u"詹姆士是体育明星"
    ])
    Y_train = np.array([1, 1, 1, 2, 2, 2])
    Test_data = [u"我想听薛之谦的演员", "邓亚萍是体育明星", "刘翔是体育明星"]
    Model = Classify()
    Model.load_W2V_Model("word2vec.model")
    Model.train(X_train, Y_train)
    Model.predict(Test_data)

    Model.save_NBmodel("NB.model")
    del Model

    NBmodel_test = Classify()
    NBmodel_test.load_NBmodel("NB.model")
    NBmodel_test.predict(Test_data)
    del NBmodel_test
示例#28
0
from __future__ import division
import sys
import os
from classify import Classify
from db import Db

classifier = Classify()


def is_doctype_valid(doctype):
    return Db().get_words_count(doctype) > 0


def check_file(f):
    results = []
    for line in open(f, 'r').readlines():
        try:
            classifier.set_text(line)
            results += [classifier.execute()]
        except ValueError:
            pass

    return results


def check_dir(d):
    results = []
    for f in os.listdir(d):
        if f.endswith(".js"):
            results += check_file(os.path.join(d, f))
示例#29
0
def get_wf():
    wf = pe.Workflow(name="svc_workflow")
    wf.base_dir = os.path.join(workingdir,"npairs_IQ_C1e-11")
    wf.config['execution']['crashdump_dir'] = wf.base_dir + "/crash_files"

    #INFOSOURCE ITERABLES
    subject_id_infosource = pe.Node(util.IdentityInterface(fields=['subject_id']), name="subject_id_infosource")
    subject_id_infosource.iterables = [('subject_id', subjects)]
    
    scan_id_infosource = pe.Node(util.IdentityInterface(fields=['scan_id']), name= 'scan_id_infosource')
    scan_id_infosource.iterables = ('scan_id', scans)

    preproc_id_infosource = pe.Node(util.IdentityInterface(fields=['preproc_id']), name="preproc_id_infosource")
    preproc_id_infosource.iterables = ('preproc_id', preprocs)

    #DATAGRABBER
    datagrabber = pe.Node(nio.DataGrabber(infields=['subject_id', 'scan_id','preproc_id'], outfields=['falff_files','dr_files','reho_files']), name='datagrabber')
    datagrabber.inputs.base_directory = '/'
    datagrabber.inputs.template = '*'
    datagrabber.inputs.field_template = dg_template
    datagrabber.inputs.template_args = dg_args
    datagrabber.inputs.sort_filelist = True

    wf.connect(subject_id_infosource, 'subject_id', datagrabber, 'subject_id')
    wf.connect(scan_id_infosource, 'scan_id', datagrabber, 'scan_id')
    wf.connect(preproc_id_infosource, 'preproc_id', datagrabber, 'preproc_id')
    
    #OUTPUT PATHS & LABELS
    toText = pe.JoinNode(Text_out(), joinsource='subject_id_infosource', joinfield="in_file", name="falff_text_files")
    wf.connect(datagrabber, 'falff_files', toText, 'in_file')

    
    toText2 = pe.JoinNode(Text_out(), joinsource='subject_id_infosource', joinfield="in_file", name="reho_text_files")
    wf.connect(datagrabber, 'reho_files', toText2, 'in_file')
    
    toText3 = pe.JoinNode(Text_out(), joinsource='subject_id_infosource', joinfield="in_file", name="dr_text_files")
    wf.connect(datagrabber, 'dr_files', toText3, 'in_file')
    
    #RUN CLASSIFIERs
    classifier = pe.Node(Classify(), name='SVC_falff')
    classifier.inputs.mask_file = mask_file
    wf.connect(toText, 'label_file', classifier, 'label_file')
    wf.connect(toText, 'data_paths', classifier, 'path_file')
    
    classifier2 = pe.Node(Classify(), name='SVC_reho')
    classifier2.inputs.mask_file = mask_file
    wf.connect(toText2, 'label_file', classifier2, 'label_file')
    wf.connect(toText2, 'data_paths', classifier2, 'path_file') 
    
    classifier3 = pe.Node(Classify(), name='SVC_dr')
    classifier3.inputs.mask_file = mask_file
    wf.connect(toText3, 'label_file', classifier3, 'label_file')
    wf.connect(toText3, 'data_paths', classifier3, 'path_file')
    #DATASINK
    ds = pe.Node(nio.DataSink(), name='datasink')
    ds.inputs.base_directory = outputdir
    
    wf.connect(classifier, 'pred', ds, 'prediction_accuracy_falff')
    wf.connect(classifier, 'rep', ds, 'reproducibility_falff')
    wf.connect(classifier, 'imgs', ds, "img_labels_falff")
    wf.connect(classifier, 'splits', ds, "splits_falff")
    wf.connect(classifier, 'sexs', ds, "sex_labels_falff")
    wf.connect(classifier, 'coefs', ds, "coefs_falff")
    wf.connect(classifier, 'datary', ds, "data_array_falff")
    
    wf.connect(classifier2, 'pred', ds, 'prediction_accuracy_reho')
    wf.connect(classifier2, 'rep', ds, 'reproducibility_reho')
    wf.connect(classifier2, 'imgs', ds, "img_labels_reho")
    wf.connect(classifier2, 'splits', ds, "splits_reho")
    wf.connect(classifier2, 'sexs', ds, "sex_labels_reho")
    wf.connect(classifier2, 'coefs', ds, "coefs_reho")
    wf.connect(classifier2, 'datary', ds, "data_array_reho")
    
    wf.connect(classifier3, 'pred', ds, 'prediction_accuracy_dr')
    wf.connect(classifier3, 'rep', ds, 'reproducibility_dr')   
    wf.connect(classifier3, 'imgs', ds, "img_labels_dr")
    wf.connect(classifier3, 'splits', ds, "splits_dr")
    wf.connect(classifier3, 'sexs', ds, "sex_labels_dr")
    wf.connect(classifier3, 'coefs', ds, "coefs_dr")
    wf.connect(classifier3, 'datary', ds, "data_array_dr")
    
    wf.config['execution'] = {
                               'plugin': 'Linear',
                               'stop_on_first_rerun': 'False',
                               'hash_method': 'timestamp'}
    return wf
class Factory(object):
    def __init__(self, config):
        self.config = config
        self.analyzer = Analyzer(self.config)
        self.classify = Classify(config)

    @staticmethod
    def get_all_column_data(file):
        """
        Combine all column data into a single feature matrix
        :param file:
        :return:
        """
        # Get all the feature matrices
        title_matrix, response_vector = f.analyze_column_data(file, 'title')
        abstract_matrix, response_vector = f.analyze_column_data(
            file, 'abstract')
        claims_matrix, response_vector = f.analyze_column_data(file, 'claims')

        # Get them all together
        feature_matrix = hstack([title_matrix, abstract_matrix])
        feature_matrix = hstack([feature_matrix, claims_matrix])
        return feature_matrix, response_vector

    def analyze_column_data(self, filename, column_name):
        """
        Create the feature model and matrix for the abstract column
        :param filename:
        :return:
        """
        self.analyzer.load_patent_data(filename)
        self.analyzer.extract_data(column_name)
        n_grams = 1
        self.analyzer.extract_features(n_grams, column_name)
        return self.analyzer.feature_matrix, self.analyzer.response

    def compute_heuristics(self, filename, column_name):
        """
        Figure out what words make up the groups in the shit
        :param filename:
        :return:
        """
        self.analyze_column_data(filename, column_name)
        self.analyzer.heuristics(column_name)

    def full_train(self):
        """
        GET THE CLASSIFIER TRAINED
        :return:
        """
        # self.classify.feature_selection()
        self.classify.classifier_selection()
        # self.classify.optimize_classifier()
        self.classify.train()
        self.classify.save_classifier()

    def evaluate(self, title, abstract, claims):
        """
        Predict group of a single entry
        :param abstract:
        :return:
        """
        self.analyzer.load_model('title')
        title_vector = self.analyzer.transform([title])
        self.analyzer.load_model('abstract')
        abstract_vector = self.analyzer.transform([abstract])
        self.analyzer.load_model('claims')
        claims_vector = self.analyzer.transform([claims])

        feature_vector = hstack([title_vector, abstract_vector])
        feature_vector = hstack([feature_vector, claims_vector])

        return feature_vector

    def predict(self, feature_vector):
        """
        Predict class based on feature vector input
        :param feature_vector:
        :return:
        """
        group = self.classify.predict(feature_vector)
        return group
示例#31
0
def action(q: str, a1: str = "", a2: str = "", a3: str = ""):
    if q == "login":
        try:
            dt.config["mofid_login"] = int(a1)
        except:
            return "فقط عدد مجاز است!"
        dt.config["mofid_pass"] = a2
        dt.save_config()
        return str(dt.init_mofid())

    elif q == "classify":
        global classifier
        if classifier is not None and classifier.active: return "already"
        classifier = Classify()
        classifier.start()
        return "started"

    elif q == "reset":
        c = dt.cur(True)
        try:
            for rt in fn.required_tables.keys():
                c.execute("DROP TABLE IF EXISTS " + rt)
        except:
            return "aborted"
        dt.cur(False)
        return "done"

    elif q == "check":
        c = dt.cur(True)
        c.execute("SELECT auto FROM symbol WHERE id='" + a1 + "' LIMIT 1")
        try:
            stat = c.fetchone()[0]  # int
        except IndexError:
            return "not found"
        binary = fn.auto_to_binary(stat)
        if a2 == "-1":
            binary = "".join([a3 for _ in range(len(dt.config["timeframes"]))])
        else:
            binary = list(binary)
            binary[int(a2)] = a3
            binary = "".join(binary)
        c.execute("UPDATE symbol SET auto = '" + str(int(binary, 2)) + "' WHERE id='" + a1 + "'")
        dt.connect.commit()
        dt.cur(False)
        return binary

    elif q == "analyze":
        ret = fn.persian_board(a3)
        if ret is None: return "invalid date"
        a = ret[0]
        b = ret[1]
        Analyzer.put_temp(a1, int(a2), a, b)
        return '<img src="./html/img/indicator_1.png" class="indicator">'

    elif q == "delete":
        a = b = tfr = None
        if a2 != "":
            tfr = dt.config["timeframes"][int(a2)]["value"]
        if a3 != "":
            ret = fn.persian_board(a3)
            if ret is None: return "invalid date"
            a = ret[0]
            b = ret[1]
        Analyzer.put_temp(a1, tfr, a, b, "delete")
        return "saved"

    elif q == "update_all":
        global updating
        if updating: return "already"
        updating = True
        c = dt.cur(True)
        c.execute("SELECT name FROM sqlite_master WHERE type ='table' AND name NOT LIKE 'sqlite_%';")
        tbs = fn.tables(c)
        dt.cur(False)
        for tb in tbs:
            if tb not in fn.required_tables:
                update_table(tb)
        updating = False
        return "saved"

    elif q == "update_symbol":
        c = dt.cur(True)
        c.execute("SELECT name FROM sqlite_master WHERE type ='table' AND name NOT LIKE 'sqlite_%';")
        tbs = fn.tables(c)
        dt.cur(False)
        for tb in tbs:
            if tb.startswith("s" + str(a1) + "_"):
                update_table(tb)
        return "saved"

    elif q == "update_table":
        since = None
        if a3 != "":
            try:
                since = fn.persian_date(a3)
            except:
                return "invalid date"
        tb = "s" + str(a1) + "_" + dt.config["timeframes"][int(a2)]["name"].lower()
        update_table(tb, since)
        return "saved"

    elif q == "change_timeframe":
        which = -1
        for tfr in range(len(dt.config["timeframes"])):
            if dt.config["timeframes"][tfr]["name"] == a1:
                which = tfr
        if which != -1:
            dt.config["timeframes"].pop(which)
        else:
            for ctf in fn.all_timeframes:
                if ctf["name"] == a1:
                    dt.config["timeframes"].append(ctf)
            dt.config["timeframes"] = sorted(dt.config["timeframes"], key=lambda i: i['value'])
        dt.save_config()
        return str(not (which != -1))

    elif q == "shutdown":
        mt5.shutdown()
        dt.connect.close()
        os.kill(os.getpid(), signal.SIGTERM)

    else:
        return 500
示例#32
0
from imageprocessing.image_processor_predict import ImageProcessorPredict
from classify import Classify
from utils.app_constants import AppConstants
import math

if __name__ == '__main__':
    image_size = 224
    channel = 3
    label_size = 2

    # Predict
    classify = Classify(image_size, label_size, channel, AppConstants.MODEL_PATH_NAME)
    classify.load()
    image_predict = ImageProcessorPredict(image_size)
    image_arr = image_predict.get_image(AppConstants.ROOT_MODEL + 'car/car1.jpg');

    probs = classify.predict(image_arr)[0]
    cnt = int(sum([math.exp(i + 4) * probs[i] for i in range(len(probs))]))
    probs = [(i, round(100 * p, 1)) for i, p in enumerate(probs)]
    print probs
 def __init__(self, config):
     self.config = config
     self.analyzer = Analyzer(self.config)
     self.classify = Classify(config)