def class_(namespace): '''Create classifer class and wait for markers from present.py''' CLSF = classify.Classifier(namespace = namespace, mapnames = mapnames, online = True, top_exp_length = top_exp_length, classifier_channels = classifier_channels, saved_classifier = savedclass) CLSF.mainloop() sys.stdout = open(str(os.getpid()) + ".out", "w")
def test_prediction(capsys, min_f1=0.89, min_accuracy=0.97): # get texts and labels from the training data train_examples = classify.read_smsspam("smsspam/SMSSpamCollection.train") train_labels, train_texts = zip(*train_examples) # get texts and labels from the development data devel_examples = classify.read_smsspam("smsspam/SMSSpamCollection.devel") devel_labels, devel_texts = zip(*devel_examples) # create the feature extractor and label encoder to_features = classify.TextToFeatures(train_texts) to_labels = classify.TextToLabels(train_labels) # train the classifier on the training data classifier = classify.Classifier() classifier.train(to_features(train_texts), to_labels(train_labels)) # make predictions on the development data predicted_indices = classifier.predict(to_features(devel_texts)) assert np.array_equal(predicted_indices, predicted_indices.astype(bool)) # measure performance of predictions devel_indices = to_labels(devel_labels) spam_label = to_labels.index("spam") f1 = f1_score(devel_indices, predicted_indices, pos_label=spam_label) accuracy = accuracy_score(devel_indices, predicted_indices) # print out performance if capsys is not None: with capsys.disabled(): msg = "\n{:.1%} F1 and {:.1%} accuracy on SMSSpam development data" print(msg.format(f1, accuracy)) # make sure that performance is adequate assert f1 > min_f1 assert accuracy > min_accuracy
from flask_restful import Resource, Api from flask_cors import CORS, cross_origin import cv2, csv import sys import json, os, glob, io import logging sys.path.append('Gesture-Recognition-with-3DRESNET/') import classify app = Flask(__name__) api = Api(app) CORS(app) classifier = classify.Classifier() classifier.load() @app.route("/") def hello(): return "Welcome to the Gesture Recognition API" @app.route('/segment', methods=['POST']) def segment(): """ Receives segmentation request and replies with the result """ videoName = request.form['videoName'] videoDuration = request.form['videoDuration']
def classify_upload(): """ Predict the content of an image given from the local computer and show the image detection template. """ model = request.form['model'] solvermode = request.form['solvermode'] try: # We will save the file to disk for possible data collection. imagefile = request.files['imagefile'] filename_ = str(datetime.datetime.now()).replace(' ', '_') + \ werkzeug.secure_filename(imagefile.filename) filename = os.path.join(UPLOAD_FOLDER, filename_) imagefile.save(filename) logging.info('Saving to %s.', filename) image = exifutil.open_oriented_im(filename) except Exception as err: logging.info('Uploaded image open error: %s', err) return flask.render_template( 'image_detection.html', has_result=True, result=(False, 'Cannot open uploaded image.') ) if (model == "cifar10quick"): convert_prototxt.PrototxtConverter(model, solvermode) starttime = time.time() classifier = classify.Classifier(solvermode) result = \ classifier.classify_image( (os.environ['CAFFE_ROOT']) + '/examples/cifar10/cifar10_quick.prototxt', (os.environ['CAFFE_ROOT']) + '/examples/cifar10/cifar10_quick_iter_5000.caffemodel', (os.environ['CAFFE_ROOT'])+"/examples/cifar10/mean.npy", filename, (os.environ['CAFFE_ROOT'])+"/data/cifar10/batches.meta.txt", False) endtime = time.time() totaltime = round(endtime - starttime, 4) if (model == "cifar10full"): convert_prototxt.PrototxtConverter(model, solvermode) starttime = time.time() classifier = classify.Classifier(solvermode) result = \ classifier.classify_image( (os.environ['CAFFE_ROOT']) + "/examples/cifar10/cifar10_full.prototxt", (os.environ['CAFFE_ROOT']) + "/examples/cifar10/cifar10_full_iter_70000.caffemodel", (os.environ['CAFFE_ROOT'])+"/examples/cifar10/mean.npy", filename, (os.environ['CAFFE_ROOT'])+"/data/cifar10/batches.meta.txt", False) endtime = time.time() totaltime = round(endtime - starttime, 4) if (model == "mnist"): convert_prototxt.PrototxtConverter(model, solvermode) starttime = time.time() classifier = classify.Classifier(solvermode) result = \ classifier.classify_image( (os.environ['CAFFE_ROOT'])+'/examples/mnist/lenet.prototxt', (os.environ['CAFFE_ROOT']) + '/examples/mnist/lenet_iter_10000.caffemodel', None, filename, None, True) endtime = time.time() totaltime = round(endtime - starttime, 4) return flask.render_template('image_detection.html', has_result=True, result=result, imagesrc=embed_image_html(image), totaltime=totaltime)
def classify_url(): """ Predict the content of an image given from an URL and show the image detection template. """ imageurl = request.form['imageurl'] model = request.form['model'] solvermode = request.form['solvermode'] try: filename = wget.download(imageurl, out=UPLOAD_FOLDER) except Exception as err: # For any exception we encounter in reading the image, we will just # not continue. logging.info('URL Image download error: %s', err) return flask.render_template( 'image_detection.html', has_result=True, result=(False, 'Cannot download image from URL.') ) if (model == "cifar10quick"): convert_prototxt.PrototxtConverter(model, solvermode) starttime = time.time() classifier = classify.Classifier(solvermode) result = \ classifier.classify_image( (os.environ['CAFFE_ROOT']) + '/examples/cifar10/cifar10_quick.prototxt', (os.environ['CAFFE_ROOT']) + '/examples/cifar10/cifar10_quick_iter_5000.caffemodel', (os.environ['CAFFE_ROOT'])+'/examples/cifar10/mean.npy', filename, (os.environ['CAFFE_ROOT']) + '/data/cifar10/batches.meta.txt', False) endtime = time.time() totaltime = round(endtime - starttime, 4) if (model == "cifar10full"): convert_prototxt.PrototxtConverter(model, solvermode) starttime = time.time() classifier = classify.Classifier(solvermode) result = \ classifier.classify_image( (os.environ['CAFFE_ROOT']) + '/examples/cifar10/cifar10_full.prototxt', (os.environ['CAFFE_ROOT']) + '/examples/cifar10/cifar10_full_iter_70000.caffemodel', (os.environ['CAFFE_ROOT'])+'/examples/cifar10/mean.npy', filename, (os.environ['CAFFE_ROOT']) + '/data/cifar10/batches.meta.txt', False) endtime = time.time() totaltime = round(endtime - starttime, 4) if (model == "mnist"): convert_prototxt.PrototxtConverter(model, solvermode) starttime = time.time() classifier = classify.Classifier(solvermode) result = \ classifier.classify_image( (os.environ['CAFFE_ROOT']) + '/examples/mnist/lenet.prototxt', (os.environ['CAFFE_ROOT']) + '/examples/mnist/lenet_iter_10000.caffemodel', None, filename, None, True) endtime = time.time() totaltime = round(endtime - starttime, 4) return flask.render_template('image_detection.html', has_result=True, result=result, imagesrc=imageurl, totaltime=totaltime)
def work_flow(): """The work flow of blending several TC OSW. """ load_configs.setup_logging() logger = logging.getLogger(__name__) # CONFIG try: CONFIG = load_configs.load_config() except Exception as msg: logger.exception(f'Exception occurred when loading confi: {msg}') os.makedirs(CONFIG['logging']['dir'], exist_ok=True) # read commandline arguments, first full_cmd_arguments = sys.argv # - further arguments argument_list = full_cmd_arguments[1:] try: arguments, values = getopt.getopt(argument_list, '', gnuOptions) except getopt.error as err: # output error, and return with an error code print(str(err)) sys.exit(2) input_custom_period = False input_custom_region = False specify_basin = False basin = None do_match_smap = False do_regression = False reg_instructions = None smogn_target = None interval = None do_simulate = False do_classify = False classify_instruction = None tag = None do_compare = False draw_sfmr = False max_windspd = None force_align_smap = False do_sfmr = False sfmr_instructions = None do_ibtracs = False ibtracs_instructions = None do_validation = False do_check = False do_sta_ibtracs = False do_sta_era5_smap = False do_smart_compare = False do_merra2 = False do_match_sfmr = False do_combine = False # evaluate given options for current_argument, current_value in arguments: if current_argument in ('-p', '--period'): input_custom_period = True period_parts = current_value.split(',') if len(period_parts) != 2: logger.error((f"""Inputted period is wrong: """ f"""need 2 parameters""")) elif current_argument in ('-r', '--region'): input_custom_region = True region_parts = current_value.split(',') if len(region_parts) != 4: logger.error((f"""Inputted region is wrong: """ f"""need 4 parameters""")) elif current_argument in ('-b', '--basin'): specify_basin = True basin_parts = current_value.split(',') if len(basin_parts) != 1: logger.error((f"""Inputted basin is wrong: """ f"""must 1 parameters""")) basin = basin_parts[0] elif current_argument in ('-e', '--match_smap'): do_match_smap = True elif current_argument in ('-g', '--reg'): do_regression = True reg_instructions = current_value.split(',') elif current_argument in ('--smogn_target'): smogn_target = current_value.split(',')[0] elif current_argument in ('--interval'): interval = current_value.split(',')[:2] elif current_argument in ('--simulate'): do_simulate = True simulate_instructions = current_value.split(',') elif current_argument in ('--classify'): do_classify = True classify_instructions = current_value.split(',') elif current_argument in ('--tag'): tag = current_value.split(',')[0] elif current_argument in ('-c', '--compare'): do_compare = True compare_instructions = current_value.split(',') elif current_argument in ('--draw_sfmr'): head = current_value.split(',')[0] if head == 'True': draw_sfmr = True elif head == 'False': draw_sfmr = False else: logger.error('draw_sfmr must be "True" or "False"') sys.exit(1) elif current_argument in ('--max_windspd'): head = current_value.split(',')[0] max_windspd = float(head) elif current_argument in ('--force_align_smap'): head = current_value.split(',')[0] if head == 'True': force_align_smap = True elif head == 'False': force_align_smap = False else: logger.error('force_align_smap must be "True" or "False"') sys.exit(1) elif current_argument in ('-s', '--sfmr'): do_sfmr = True elif current_argument in ('-i', '--ibtracs'): do_ibtracs = True elif current_argument in ('-v', '--validate'): do_validation = True validate_instructions = current_value elif current_argument in ('-k', '--check'): do_check = True elif current_argument in ('--sta_ibtracs'): do_sta_ibtracs = True elif current_argument in ('--sta_era5_smap'): do_sta_era5_smap = True sources = current_value.split(',') elif current_argument in ('--smart_compare'): do_smart_compare = True elif current_argument in ('--merra2'): do_merra2 = True elif current_argument in ('--match_sfmr'): do_match_sfmr = True elif current_argument in ('--combine'): do_combine = True if not specify_basin: logger.error('Must specify basin') exit() if input_custom_period: # Period parts # yyyy-mm-dd-HH-MM-SS period = [ datetime.datetime.strptime(period_parts[0], '%Y-%m-%d-%H-%M-%S'), datetime.datetime.strptime(period_parts[1], '%Y-%m-%d-%H-%M-%S') ] else: period = [ datetime.datetime(2015, 4, 1, 0, 0, 0), datetime.datetime.now() ] train_test_split_dt = datetime.datetime(2019, 1, 1, 0, 0, 0) if input_custom_region: # Area parts custom_region = [] for part in region_parts: custom_region.append(float(part)) else: region = [-90, 90, 0, 360] # Period logger.info(f'Period: {period}') # Region logger.info(f'Region: {region}') # MySQL Server root password passwd = '399710' # Download and read try: if do_combine: combine_table.TableCombiner(CONFIG, period, region, basin, passwd) if do_match_sfmr: match_era5_sfmr.matchManager(CONFIG, period, region, basin, passwd, False) if do_merra2: merra2.MERRA2Manager(CONFIG, period, False) if do_smart_compare: smart_compare.SmartComparer(CONFIG, period, basin, passwd) if do_sta_era5_smap: sta_era5_smap.Statisticer(CONFIG, period, basin, sources, passwd) if do_sta_ibtracs: sta_ibtracs.Statisticer(CONFIG, period, basin, passwd) if do_check: checker.Checker(CONFIG) if do_validation: validate.ValidationManager(CONFIG, period, basin, validate_instructions) if do_match_smap: match_era5_smap.matchManager(CONFIG, period, region, basin, passwd, False, work=True) if do_classify: classify.Classifier(CONFIG, period, train_test_split_dt, region, basin, passwd, False, classify_instructions, smogn_target) if do_simulate: simulate.TCSimulator(CONFIG, period, region, basin, passwd, False, simulate_instructions) if do_regression: # if tag is None: # logger.error('No model tag') # exit() regression.Regression(CONFIG, period, train_test_split_dt, region, basin, passwd, False, reg_instructions, smogn_target, tag) if do_compare: # if ('smap_prediction' in compare_instructions # and tag is None): # logger.error('No model tag') # exit() compare_tc.TCComparer(CONFIG, period, region, basin, passwd, False, compare_instructions, draw_sfmr, max_windspd, force_align_smap) # tag) # ccmp_ = ccmp.CCMPManager(CONFIG, period, region, passwd, # work_mode='fetch_and_compare') # era5_ = era5.ERA5Manager(CONFIG, period, region, passwd, # work=True, save_disk=False, 'scs', # 'surface_all_vars') # isd_ = isd.ISDManager(CONFIG, period, region, passwd, # work_mode='fetch_and_read') # grid_ = grid.GridManager(CONFIG, region, passwd, run=True) # satel_scs_ = satel_scs.SCSSatelManager(CONFIG, period, # region, passwd, # save_disk=False, # work=True) # coverage_ = coverage.CoverageManager(CONFIG, period, # region, passwd) if do_ibtracs: ibtracs_ = ibtracs.IBTrACSManager(CONFIG, period, region, basin, passwd) # cwind_ = cwind.CwindManager(CONFIG, period, region, passwd) # stdmet_ = stdmet.StdmetManager(CONFIG, period, region, passwd) if do_sfmr: sfmr_ = sfmr.SfmrManager(CONFIG, period, region, passwd) # satel_ = satel.SatelManager(CONFIG, period, region, passwd, # save_disk=False) # compare_ = compare_offshore.CompareCCMPWithInStu( # CONFIG, period, region, passwd) pass except Exception as msg: logger.exception('Exception occured when downloading and reading') try: # new_reg = reg_scs.NewReg(CONFIG, period, test_period, # region, passwd, save_disk=True) # ibtracs_ = ibtracs.IBTrACSManager(CONFIG, test_period, # region, passwd) # hwind_ = hwind.HWindManager(CONFIG, test_period, region, passwd) # era5_ = era5.ERA5Manager(CONFIG, test_period, region, passwd, # work=True, save_disk=False) pass except Exception as msg: logger.exception('Exception occured when downloading and reading') logger.info('SWFusion complete.')
def main_func(pprocess=False, analyze=False, meta=False, classification=False, func='summary_stat', type='#anwps_freq', sex=False, age=False): # Do pre_processing task if (pprocess): # Read data_set with limited columns cupid_df = pd.read_csv('../data/raw/profiles.csv', usecols=[ 'education', 'essay0', 'essay1', 'essay2', 'essay3', 'essay7', 'essay8', 'essay9', 'age', 'sex' ]) # Define an object of pre_processing class cupid = pre_processing.PreProcess() cupid_df = cupid.missing_value(cupid_df) cupid_df = cupid.merge_essay(cupid_df) cupid_df = cupid.remove_tag(cupid_df) cupid_df = cupid.recode_edcuaction(cupid_df) cupid_df = cupid.count_words_sentences(cupid_df) cupid_df = cupid.text_cleaning(cupid_df) # Save pre_processed dat_set on disk cupid_df.to_csv(r'../data/processed/preprocessed_cupid.csv', index=None, header=True) # Final message print( colored( 'preprocessed_cupid.csv is written in data/preprocessed\ folder...', 'red')) # ************************************************************************ # Do analyses task elif (analyze): # Read pre_processed data_set with limited columns cupid_dfa = pd.read_csv('../data/processed/preprocessed_cupid.csv', usecols=[ 'education', 'age', 'sex', 'text', 'isced', 'isced2', '#words', '#sentences', '#anwps', 'clean_text' ]) # cupid_dfa.rename(columns={'removed_stopwords': 'clean_text'}, # inplace=True) # Define an object of pre_processing class a_cupid = analyse.Analyse() if (func == 'summary_stat'): summary_df = a_cupid.summary(cupid_dfa) summary_df.to_json(r'../results/figures/summary_statistics.json') summary_df.to_csv(r'../results/figures/summary_statistics.csv') print( colored( 'summary_statistics.csv is written in ' 'results/figure folder...', 'magenta')) elif (func == 'plot'): a_cupid.plot_func(cupid_dfa, type, sex) # ************************************************************************* # Calculate meta_data if (meta): style = meta_data.Stylo() # Read data_set df_preprocessed = pd.read_csv( '../data/processed/preprocessed_cupid.csv', usecols=[ 'age', 'sex', '#anwps', 'clean_text', "text", 'isced', 'isced2' ]) df_preprocessed.dropna(subset=['text', 'isced'], inplace=True) # Print the progress number print(colored('\nCalculating count_char:\n', 'green')) df_preprocessed['count_char'] = df_preprocessed.progress_apply( lambda x: style.count_char(x['text']), axis=1) # Print the progress number print(colored('\nCalculating count_punct:\n', 'green')) df_preprocessed['count_punct'] = df_preprocessed.progress_apply( lambda x: style.count_punc(x['text']), axis=1) # df_preprocessed['count_digit'] = sum(c.isdigit() for c in # df_preprocessed['text']) # Print the progress number print(colored('\nCalculating count_word:\n', 'green')) df_preprocessed['count_word'] = df_preprocessed.progress_apply( lambda x: style.count_words(x['text']), axis=1) # Print the progress number print(colored('\nCalculating avg_wordlength:\n', 'green')) df_preprocessed['avg_wordlength'] = round( df_preprocessed['count_char'] / df_preprocessed['count_word'], 2) # Print the progress number print(colored('\nCalculating count_misspelled:\n', 'green')) df_preprocessed['count_misspelled'] = \ df_preprocessed.progress_apply(lambda x: style.count_spellerror(x[ 'text']), axis=1) # df_preprocessed['readability'] = df_preprocessed.progress_apply( # lambda x: style.text_readability(x['text']), axis=1) # Print the progress number print(colored('\nCalculating words uniqueness:\n', 'green')) df_preprocessed['word_uniqueness'] = df_preprocessed.progress_apply( lambda x: style.uniqueness(x['text']), axis=1) # Save calculated meta_data on disk df_preprocessed.to_csv(r'../data/processed/stylo_cupid_test.csv', index=None, header=True) # Final message print( colored( 'stylo_cupid.csv is written in data/preprocessed\ folder...', 'red')) # ************************************************************************** if (classification): cls = classify.Classifier() df_cls = pd.read_csv(r'../data/processed/stylo_cupid2.csv') cls.logistic_text_meta(df_cls)
def test_prediction(capsys, min_f1=0.89, min_accuracy=0.97): #K FOLD TEST full_examples = classify.read_smsspam("a_lil_more.out") full_labels, full_texts = zip(*full_examples) clf = MLPClassifier(max_iter=1000) pipeline = Pipeline([('vectorizer', CountVectorizer(binary=False, ngram_range=(1, 1), max_df=1)), ('classifier', AdaBoostClassifier())]) #print(np.asarray(train_texts[1:5])) k_fold = KFold(n_splits=2) #for LinearRegression #full_labels = [0 if i == "no" else i for i in full_labels] #full_labels = [1 if i == "yes" else i for i in full_labels] scores = [] for train_indices, test_indices in k_fold.split(np.array(full_texts)): train_text = np.array(full_texts)[train_indices] train_y = np.array(full_labels)[train_indices] test_text = np.array(full_texts)[test_indices] test_y = np.array(full_labels)[test_indices] pipeline.fit(train_text, train_y) score = pipeline.score(test_text, test_y) p = pipeline.predict(test_text) p2 = pipeline.predict_proba(test_text) scores.append(score) p_o = [j for j in p if j == "yes"] p2_o = [p2[j] for j in range(0, len(p)) if p[j] == "yes"] print("yes ", len(p_o), " total ", len(p), " proba ", p2_o[0]) score = sum(scores) / len(scores) #KFOLD performance if capsys is not None: with capsys.disabled(): msg = "\n{:.1%} score on MTURK development data" + p print(msg.format(score)) '''f = open("classify.js", "w") porter = Porter(clf, language='js') output = porter.export(embed_data=True) f.write(output) f.close()''' #NORMAL VALIDATION # get texts and labels from the training data train_examples = classify.read_smsspam("AGBIG_annotation.outt") train_labels, train_texts = zip(*train_examples) # get texts and labels from the development data devel_examples = classify.read_smsspam("AGBIG_annotation.outd") devel_labels, devel_texts = zip(*devel_examples) # create the feature extractor and label encoder to_features = classify.TextToFeatures(train_texts) to_labels = classify.TextToLabels(train_labels) # train the classifier on the training data aka fit classifier = classify.Classifier() classifier.train(to_features(train_texts), to_labels(train_labels)) # make predictions on the development data predicted_indices = classifier.predict(to_features(devel_texts)) assert np.array_equal(predicted_indices, predicted_indices.astype(bool)) # measure performance of predictions devel_indices = to_labels(devel_labels) spam_label = to_labels.index("yes") f1 = f1_score(devel_indices, predicted_indices, pos_label=spam_label) accuracy = accuracy_score(devel_indices, predicted_indices) # print out performance if capsys is not None: with capsys.disabled(): msg = "\n{:.1%} F1 and {:.1%} accuracy on MTURK development data" print(msg.format(f1, accuracy))
import sys sys.path.insert(0, '/extra/manojgopale/AES_data/') import classify dataDir = "/xdisk/manojgopale/data_csv/config5p4/" trainData, devData, testData = classify.getData(dataDir, 15000) x_train, y_train_oh = trainData x_dev, y_dev_oh = devData x_test, y_test_oh = testData ## Instantiate the model and test, dev and training sets resultDir = "/extra/manojgopale/AES_data/config5p4_15ktraining/result_new" modelName = "m_newscript" classifier = classify.Classifier(resultDir, modelName, x_train, y_train_oh, x_dev, y_dev_oh, x_test, y_test_oh) ## Train the model classifier.train(2048) ## Evaluate classifier.evaluate() ##Save the model classifier.saveModel()