def main(): global config USE_CLASSIFIER='lstm' weight_file_path = Config.getPath('models') + '/' + USE_CLASSIFIER + '-weights.h5' load_config(USE_CLASSIFIER) classifier=ClassifierFactory.getLSTM(**{'config':config}) #Classifier(model_name=USE_CLASSIFIER,config=config) classifier.load_weights(weight_file_path) df = pd.read_csv(Config.getPath('data') + '/' + TESTING_DATA) Xtest = df['question_text'] Ytest = df['target'] print('extract configuration from input texts ...') print('testing size: ', len(Xtest)) print('start predicting ...') pred = classifier.predict(Xtest) print(pred) score = metrics.accuracy_score(Ytest, pred) print("accuracy: %0.3f" % score) cm = metrics.confusion_matrix(Ytest, pred, labels=[0, 1]) plot_confusion_matrix(cm, classes=[0, 1])
def main(): args = parse_arguments() hosts = get_hosts(args.automate) user = get_user(args.automate) tmp_dir = get_tmp_dir(args.automate) conf = Config(args.output_file, hosts, user, tmp_dir) if args.automate: rbdfio = RbdFio(True, conf) kvmrbdfio = KvmRbdFio(True, conf) radosbench = Radosbench(True, conf) conf.add_benchmark_settings(rbdfio.output) conf.add_benchmark_settings(kvmrbdfio.output) conf.add_benchmark_settings(radosbench.output) else: tests = select_tests() for test in tests: use_default = False print "\nEntering settings for %s:" % (test) while True: try: default = raw_input("Would you like to use default" " settings for %s [y/n]? " % (test)) except KeyboardInterrupt: print "Aborting script. No data will be saved." sys.exit(1) if default.lower() == "y": print "Using default values for %s" % (test) use_default = True break elif default.lower() == "n": use_default = False break generate_test_values(test, use_default, conf) conf.save_file() print "Output saved to: %s" % (conf.out_file)
def __init__(self, ID, spreadsheet_path): self.ID = ID self.spreadsheet = spreadsheet_path self.tool_type = "" self.use_limit = "" self.cal_date = "" self.cal_exp = "" self.log_path = "" self.cert_path = "" # DONE CHANGE SHEETROW TO BLANK self.sheetrow = None # row that contains data for ID of interest self.use = "" self.use_limit = None self.location = "" self.exp_type = "" self.status = 'GOOD FOR USE' self.legacyID_col = 1 # DONE REMOVE BELOW self.ID_col = 2 # Col that contains IDs default from sheet as of 02/01/2021 can be set externally self.df = pd.read_csv( spreadsheet_path, encoding='UTF-8') # big thank 6294 for selling me csv ;) self.config = Config()
def process_in_multiple_cfg(image, cfg_ls=['Configs/config1.py', 'Configs/config2.py','Configs/config3.py'], is_visualize=False): r"""Full process in multiple config Args: path:str, path to a single image cfg_ls: list of class Config is_visualize:Boolen,whether or not visualization """ ls = [] for cfg in cfg_ls: cfg = Config(cfg) res = process_in_single_cfg(image, cfg) if res != None: ls.append(res) if len(ls)>0: if is_visualize == True: cv2.line(image, (int(sum(ls)/len(ls)),0), (int(sum(ls)/len(ls)), image.shape[0]), (255,255,0), thickness=4, lineType=8, shift=0) cv2.imshow('result', image) cv2.waitKey(0) return int(sum(ls)/len(ls)) else: return None
def load_config(model_name): print('loading csv file ...') global config config_file_path = Config.getPath('models') + '/' + model_name + '-config.joblib' config = ClassifierFactory.getConfig(joblib_file=config_file_path) #Two classes - Fake=0, Reliable=1 config.set('num_target_tokens',2)
def predict_svm(): global config load_config('svm') print('loading data...') df = pd.read_csv(Config.getPath('data') + '/' + TRAINING_DATA) df2 = df.sample(50000) X = df2['question_text'] Y = df2['target'] Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) # Two classes - Fake=0, Reliable=1 config.set('num_target_tokens', 2) classifier = ClassifierFactory.getSVM() print('training size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') classifier.fit(Xtrain, Ytrain, Xtest, Ytest) df = pd.read_csv(Config.getPath('data') + '/' + TESTING_DATA) df = df.sample(100000) X = df['question_text'] Y = df['target'] pred = classifier.predict(X) score = metrics.accuracy_score(Y, pred) f1score = metrics.f1_score(Y, pred) print("accuracy: %0.3f" % score) print("f1 score: %0.3f" % f1score) cm = metrics.confusion_matrix(Ytest, pred, labels=[0, 1]) plot_confusion_matrix(cm, classes=[0, 1])
def load_config(**kwargs): print('loading csv file ...') global config df = pd.read_csv(Config.getPath('data') + '/' + TRAINING_DATA) df = df.sample(50000) X = df['question_text'] Y = df['target'] print('preparing configuration...') config = ClassifierFactory.getConfig(X, Y, json_file=None, **kwargs) #Two classes - Fake=0, Reliable=1 config.set('num_target_tokens', 2)
def getConfig(X=None,Y=None,json_file=None,joblib_file=None,**kwargs): """ :param X: Text Data to be classified :param Y: True labels for training :param json_file: File to load Config :param kwargs: rest of config params: max_input_seq_length max_vocab_size num_target_tokens embedding embedding_size :return: """ if (ClassifierFactory.__config==None and joblib_file==None and json_file==None): ClassifierFactory.__config=Config(Txt_data=X,Txt_labels=Y,**kwargs) else: if (joblib_file!=None): ClassifierFactory.__config = Config.from_joblib(joblib_file) else: ClassifierFactory.__config=Config.from_json(json_file) return ClassifierFactory.__config
def train_vanilla(classifier): global config print('configuration extracted from input texts ...') Xtrain, Xtest, Ytrain, Ytest = train_test_split(config.getData(), config.getLabels(), test_size=0.2, random_state=42) print('training size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') history = classifier.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=config.get('epochs')) if (history != None): history_plot_file_path = Config.getPath( 'reports') + '/' + classifier.model_name + '-history.png' plot_and_save_history(history, classifier.model_name, history_plot_file_path)
import logging import logging.handlers from time import gmtime, strftime from config_class import Config config = Config() log_path = config.system['log_path'] class FileLogger: """ Logger class for logging to file with rotation """ LOG_FILENAME = log_path logger = logging.getLogger(__name__) def __init__(self, log_to_console=False): self.log_to_console = log_to_console self.log_format = '%(asctime)s [%(threadName)-12.12s] %(levelname)s - %(message)s' logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # create a file handler handler = logging.handlers.RotatingFileHandler(self.LOG_FILENAME, maxBytes=1000000, backupCount=10) handler.setLevel(logging.INFO) # create a logging format formatter = logging.Formatter(self.log_format) handler.setFormatter(formatter)
from __future__ import absolute_import # updated importing tools for python v3.x.x import sys from azqa_instrument import Tool # import the Tool helper class from tkinter import messagebox from tkinter import ttk from tkinter import PhotoImage import tkinter from read_log import Log import os from write_temp import Temp from config_class import Config from PIL import ImageTk, Image from date_check import Date import datetime config_file = Config() # TODO: Add filepath for spreadsheet spread_path = config_file.masterlist def main(): tool_input = input( 'Input tool ID') #REMOVED & REPLACED FOR COMMAND LINE ARGUMENTS #tool_input = sys.argv[1] # Grab the command line argument (tool ID) and assign it as tool_input # DONE remove below # DONE remove below #webbrowser.open_new(spread_path)
r"D:\Data Science\Projects\twitter-airline-sentiment\training_files\training_data\validation_data\validation_indices.pickle" ) val_labels = load_obj( r"D:\Data Science\Projects\twitter-airline-sentiment\training_files\training_data\validation_data\validation_labels.pickle" ) test_indices = load_obj( r"D:\Data Science\Projects\twitter-airline-sentiment\training_files\training_data\test_data\test_indices.pickle" ) test_labels = load_obj( r"D:\Data Science\Projects\twitter-airline-sentiment\training_files\training_data\test_data\test_labels.pickle" ) #=====The following line specifies the fixed parameters of the model===== config = Config(200, 3, 1694, 659, 732, 500, 1e-4, 35, 1, embedding_matrix) #n_features, n_classes, batch, val_batch, test_batch, n_epochs, lr, max_l, n_layers, embeddings #Uae hyperopt to find the best hyperparameters hyp_dir = r"D:\Data Science\Projects\twitter-airline-sentiment\training_files\training_logs\hyperopt\bd_1l_hyp_trials.pickle" #best, trials = hyperopt_wrapper_nn(train_indices, train_labels, val_indices, val_labels, config, hyp_dir, True) ''' Use random search to find the best hyperparameters random_search_log = r"D:\Data Science\Projects\twitter-airline-sentiment\training_files\training_logs\random_search\two_layer_random_search_log.csv" randomized_search(train_indices, train_labels, val_indices, val_labels, config, random_search_log) ''' #=====Evaluating model performance on test set===== ''' vals': {'n_dropout_1': [0.12294832533717485], 'n_hidden_units_1': [495.0],
# Use this to easily run the code in different directories/devices folder['initial'] = 'C:/Users/jimar/Dimitris/python/' # The path where the repository is stored folder['main'] = folder['initial'] + 'crack_detection_CNN_masonry/' # if folder['main'] == '', then the current working directory will be used if folder['main'] == '': folder['main'] = os.getcwd() import sys sys.path.append(folder["main"]) from config_class import Config cnf = Config(folder["main"]) args = cnf.set_repository() # Set some parameters IMAGE_DIMS = cnf.IMAGE_DIMS # import the necessary packages from sklearn.model_selection import train_test_split from skimage.transform import resize from imutils import paths import numpy as np import progressbar import cv2 from subroutines.HDF5 import HDF5DatasetWriterMask
def train_experiment(classifier): print('loading csv file ...') global config df = pd.read_csv(Config.getPath('data') + '/' + TRAINING_DATA) df = df.sample(20000) X = df['question_text'] Y = df['target'] print('splitting data...') Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42) print('training size: ', len(Xtrain)) print('testing size: ', len(Xtest)) print('start fitting ...') # max_sequence, vocab_size, lstm_units, dropout experiment = [ [20, 5000, 64, 0.2], #0 [35, 5000, 64, 0.2], #1 [50, 5000, 64, 0.2], #2 [100, 5000, 64, 0.2], #3 [50, 5000, 128, 0.2], # 4 [50, 5000, 256, 0.2], # 5 [50, 5000, 512, 0.2], # 6 [50, 2000, 64, 0.2], # 7 [50, 3000, 64, 0.2], # 8 [50, 4000, 64, 0.2], # 9 [50, 5000, 64, 0.2], # 10 [50, 6000, 64, 0.2], # 11 [50, 7000, 64, 0.2], # 12 [50, 8000, 64, 0.2], # 13 [50, 9000, 64, 0.2], # 14 [50, 5000, 64, 0.1], # 15 [50, 5000, 64, 0.2], # 16 [50, 5000, 64, 0.3], # 17 [50, 5000, 64, 0.4], # 18 ] i = 0 for max_seq, vocab_siz, lstm_u, drop in experiment: config = Config(X, Y, max_seq, vocab_siz, 2, 'glove') print("%s starting experiment ... %d" % (datetime.datetime.now(), i)) #model=ClassifierFactory.getLSTM(**{'config':config,'lstm_units':lstm_u,'dropout':drop}) model = ClassifierFactory.getLSTM(**{ 'config': config, 'lstm_units': lstm_u, 'dropout': drop }) history = model.fit(Xtrain, Ytrain, Xtest, Ytest, epochs=10, file_prefix='experiment-%i' % i) history_plot_file_path = Config.getPath( 'reports') + '/' + model.model_name + ('_experiment_%d' % i) + '-history.png' plot_and_save_history(history, model.model_name, history_plot_file_path) i += 1
# Use this to easily run the code in different directories/devices folder['initial'] = 'C:/Users/jimar/Dimitris/python/' # The path where the repository is stored folder['main'] = folder['initial'] + 'crack_detection_CNN_masonry/' # if folder['main'] == '', then the current working directory will be used if folder['main'] == '': folder['main'] = os.getcwd() import sys sys.path.append(folder['main']) from config_class import Config cnf = Config(folder['main']) args = cnf.set_repository() # Set some parameters IMAGE_DIMS = cnf.IMAGE_DIMS BS = cnf.BS epochs = cnf.epochs INIT_LR = cnf.INIT_LR N_FILTERS = cnf.N_FILTERS info = cnf.info mode = cnf.mode # When using DeepCrack, eager execution needs to be enabled if args["model"] == 'DeepCrack': import tensorflow as tf tf.enable_eager_execution()