def classify_new_text(text): """ Given a text it returns all the metrics """ name_classifier = "Classifier" timer = u.Timer() print "\nStarting to classify text with", len(text), "characters" from process_text import process_text_from_string clf = get_classifier(name_classifier) x_pred = process_text_from_string(text) #check if the text was processed correctly if x_pred is not None: y_pred = clf.predict([x_pred]) tag = str(y_pred[0]) #if its not correctly processed, return tag Z else: tag = "Z" print "\nText processed in", timer.get_time() print "It is difficulty", tag return tag
def __call__(self): # sys.stdout.write("timing function call duration for %s\n" % self.f.__name__) t = utilities.Timer() with t: self.f(self.current_value) current_duration = t.duration_in_seconds() if not self.last_duration: # This is the first time we called this function just store our duration for next time self.last_duration = current_duration return # compare the current duration to the previous # if the current duration is less keep incrementing exponentially sys.stdout.write( "function call duration for %s is %.02f while the last duration was %.02f exponent=%.02f backoff=%.02f\n" % (self.f.__name__, current_duration, self.last_duration, self.exponent, self.backoff)) if current_duration < self.last_duration: self.exponent += 1 / (10 + self.backoff) self.backoff += 1 / (10 + self.backoff) else: # if the current duration is more keep decrementing exponentially self.backoff -= self.backoff / 2 self.exponent -= self.backoff self.current_value = self.initial_value ** self.exponent
def full_ML_process(calculate=False, plot=False): """ It allows to do all the processes. It will process the texts (if asked), train a classifier and test it. It will also plot all the possible combinations of the input variables of the ML part (if asked) Args: calculate: if true, it will force to process all the data and recalculate X and Y plot: if true, it will save all the possible scaterplots of the input varaibles (X) """ #name_actual_classifier = name_adaBoost name_actual_classifier = name_SVM #name_actual_classifier = name_naive_bayes #name_actual_classifier = name_decision_tree #name_actual_classifier = name_random_forest timer = u.Timer() test_a_classifier(name_actual_classifier, calculate=calculate) #classify_new_text(name_actual_classifier, "Me gusta saltar piedras") import data_analisis if plot: data_analisis.plot_all() data_analisis.get_correlation_matrix() print "\nAll the processes done in", timer.get_time()
def compute(self): """ Takes ReLUNet and casts weights to a temp file so we can run the Matlab/Mosek SDP solver on these. Kwargs to come """ timer = utils.Timer() # Collect weights and put them in a temp file weights = self.extract_weights(self.network, self.c_vector) weight_file = secrets.token_hex(24) + '.mat' weight_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'saved_weights', weight_file) savemat(weight_path, weights) # Build matlab stuff eng = matlab.engine.start_matlab() for path in self.MATLAB_PATHS: eng.addpath(os.path.join(self.LIPSDP_DIR, path)) eng.addpath(os.path.dirname(weight_path)) network = { 'alpha': matlab.double([[0.]]), 'beta': matlab.double([[1.]]), 'weight_path': [weight_path] } lip_params = self.DEFAULT_LIPSDP_KWARGS L = eng.solve_LipSDP(network, lip_params, nargout=1) self.compute_time = timer.stop() self.value = L os.remove(weight_path) return L
def __init__(self, name_tagger, corpus, mwe=True): """ When initialized it will load all the taggers. They are: * UnigramTagger * BigramTagger * TrigramTagger If not possible it will create them, and save them. If Multi-Word Expressions are not allowed its necessary to split them and then use a UnigramTagger to be trained Args: name_tagger: root part of the name of the tagger, like cess_esp corpus: corpus that will train the tagger mwe: It can allow Multi-Word Expressions """ self.mwe = mwe if not mwe: name_tagger += '_' + NOMWE_TEXT #set the names of the taggers like: # cess_es_unigram.tagger, cess_es_bigram.tagger # or cess_es_nomwe_unigram.tagger, cess_es_nomwe_bigram.tagger complete_names = [name_tagger + '_' + x for x in N_GRAM_NAMES] # Try to load the taggers. try: for x in complete_names: utilities.load_pickle(x, TAGGER_EXTENSION, TAGGER_PATH).tag(['hola']) #If it not work create them except IOError: print "\n*** First-time use of", name_tagger, " taggers ***" print "Training taggers ..." timer = utilities.Timer() if self.mwe: cess_sents = corpus.tagged_sents() train_tagger(name_tagger, cess_sents) else: #Without mutliwords we need to split them cess_sents = unchunk(corpus.tagged_sents()) #We need the mwe tagger to train aux_tagger = tagger(name_tagger, corpus, mwe=True) tagged_cess_nomwe = aux_tagger.uni.tag_sents(cess_sents) train_tagger(name_tagger + '_' + NOMWE_TEXT, tagged_cess_nomwe) print "\nAll taggers trained in", timer.get_time(), "seconds" # Load tagger self.uni = utilities.load_pickle(complete_names[0], TAGGER_EXTENSION, TAGGER_PATH) self.bi = utilities.load_pickle(complete_names[1], TAGGER_EXTENSION, TAGGER_PATH) self.tri = utilities.load_pickle(complete_names[2], TAGGER_EXTENSION, TAGGER_PATH)
def compute(self): """ Estimate (without any guarantee if upper or lower bound) by searching over ReLU signs of each layer. """ timer = utils.Timer() # Step 1: split up the network # f(x) matches r/(LR)+Lx/ # J(x) matches r/(L Lambda)+L/ # We do SVD's on each linear layer svds = [] for linear_layer in self.network.fcs: if (linear_layer == self.network.fcs[-1] and self.c_vector is not None): c_vec = torch.tensor(self.c_vector).view(1, -1) c_vec = c_vec.type(self.network.dtype) weight = c_vec @ linear_layer.weight else: weight = linear_layer.weight svds.append(torch.svd(weight)) num_relus = len(self.network.fcs) - 1 # Then set up each of the (num_relus) subproblems: subproblems = [] # [(Left, Right), ....] for relu_num in range(num_relus): left_svd = svds[relu_num + 1] right_svd = svds[relu_num] _, sigma_ip1, v_ip1 = svds[relu_num + 1] u_i, sigma_i, _ = svds[relu_num] if relu_num != num_relus - 1: sigma_ip1 = torch.sqrt(sigma_ip1) if relu_num != 0: sigma_i = torch.sqrt(sigma_i) sigma_i = torch.diag(sigma_i) sigma_ip1 = torch.diag(sigma_ip1) subproblems.append(((sigma_ip1 @ v_ip1.t()).data, (u_i @ sigma_i).data)) # And solve each of the subproblems: dual_norm = {'linf': 1, 'l2': 2, 'l1': np.inf}[self.primal_norm] lips = [sl.optim_nn_pca_greedy(*_, verbose=False, use_tqdm=False, norm_ord=2)[0] for _ in subproblems] self.value = utils.prod(lips) self.compute_time = timer.stop() return self.value
def compute(self): """ Computes the L2 global lipschitz constant for this network by multiplying the operator norm of each weight matrix """ timer = utils.Timer() c_vec_norm = self.norm_fxn(self.c_vector) operator_norms = [] for fc in self.network.fcs: operator_norms.append(self.norm_fxn(fc.weight)) running_norm = c_vec_norm for op_norm in operator_norms: running_norm *= op_norm self.value = running_norm self.compute_time = timer.stop() return running_norm
def run(self): self.iteration_time = 0 for i in range(self.args.iterations): self.iteration = i # sys.stdout.write("Calculating fitness of iteration %d\n" % i) timer = utilities.Timer() with timer: self.calculate_fitness() self.sort_by_fitness() self.iteration_time += timer.duration_in_seconds() self.print_best() self.mate()
def compute(self): # Fast lip is just interval bound propagation through backprop timer = utils.Timer() preacts = HBoxIA(self.network, self.domain, self.c_vector) preacts.compute_forward() preacts.compute_backward() backprop_box = preacts.gradient_range # Worst case vector is max([abs(lo), abs(hi)]) self.worst_case_vec = np.maximum(abs(backprop_box.box_low), abs(backprop_box.box_hi)) # And take dual norm of this dual_norm = {'linf': 1, 'l1': np.inf, 'l2': 2}[self.primal_norm] value = np.linalg.norm(self.worst_case_vec, ord=dual_norm) self.value = value self.compute_time = timer.stop() return value
def build_gurobi_squire(self): """ Builds the gurobi squire """ network = self.network assert self.primal_norm in ['linf', 'l1'] # Meaning we want max ||grad(f)||_* assert (self.preact in self.VALID_PREACTS or isinstance(self.preact, HBoxIA)) timer = utils.Timer() # Step 1: Build the pre-ReLU and pre-switch hyperboxes if not isinstance(self.preact, HBoxIA): pre_bounds = HBoxIA(self.network, self.domain, self.c_vector) pre_bounds.compute_forward(technique=self.preact) pre_bounds.compute_backward(technique=self.preact) else: pre_bounds = self.preact squire = build_gurobi_model(network, pre_bounds, self.primal_norm, verbose=self.verbose) return squire, timer
def compute(self, num_points=1000): """ Computes maximum of dual norm of gradients of random points. Can be called multiple times and will only improve """ timer = utils.Timer() dual = {'l1': np.inf, 'l2': 2, 'linf': 1}[self.primal_norm] random_output = self.network.random_max_grad(self.domain, self.c_vector, num_points, pnorm=dual) if (self.max_norm is None) or (random_output['norm'] > self.max_norm): self.max_norm = random_output['norm'] self.max_point = random_output['point'] self.max_grad = random_output['grad'] self.value = self.max_norm # redundancy here if self.compute_time is None: self.compute_time = 0 self.compute_time += timer.stop() return self.value
def compute(self, preact_method='naive_ia', tighter_relu=False): timer = utils.Timer() # Use the GurobiSquire / model constuctor in lipMIP file lip_prob = lm.LipMIP(self.network, self.domain, self.c_vector, primal_norm=self.primal_norm, preact=preact_method) squire, timer = lip_prob.build_gurobi_squire() # And then we'll just change any binary variables to continous ones: model = squire.lp_ify_model(tighter_relu=tighter_relu) # And optimize model.optimize() if model.Status in [3, 4]: print('INFEASIBLE') self.value = model.getObjective().getValue() self.compute_time = timer.stop() return self.value
def plot_all(): """ Plot scatter plots of all the combinations of the current metrics """ timer = u.Timer() print "\nPlotting all the combinations" #delete existing png files u.delete_files(GRAPH_PATH, ".png") import process_text as pt #do all the possible combination for i in range(0, len(pt.get_metrics_header())): for j in range(0, len(pt.get_metrics_header())): if i != j: print "ploting", i, "vs", j scatter_plot(i, j) print "\nAll graphs created in", timer.get_time()
# The number 3797 has an interesting property. Being prime itself, it is possible to continuously remove digits # from left to right, and remain prime at each stage: 3797, 797, 97, and 7. # Similarly we can work from right to left: 3797, 379, 37, and 3. # Find the sum of the only eleven primes that are both truncatable from left to right and right to left. # NOTE: 2, 3, 5, and 7 are not considered to be truncatable primes. # 1.35s, could be improved import utilities as u timer = u.Timer() def is_truncatable(prime): prime = str(prime) for i in range(1, len(prime)): # Remove digits from the left if not u.is_prime(int(prime[i:])): return False for i in range( len(prime), 0, -1 ): # If none of the above checks fail, remove digits from the right if not u.is_prime(int(prime[:i])): return False return True # We only reach this is we pass through both truncation directions primes = u.e_sieve(1000000)[4:] # Ignore primes noted in NOTE truncatable_primes = [] for p in primes:
def train_classifier(name, x_train, y_train): """ Using the caracteristics and the labels it will train the classifier and save it as a pickle file Args: name: name of the classifier x_train: metrics to train the classifier y_train: labels to train the classifier Returns: the classifier """ """ Classifiers info: http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html """ timer = u.Timer() print "\nTraining", name from sklearn.preprocessing import StandardScaler scaler = StandardScaler(copy=True, with_mean=True, with_std=True) from sklearn.feature_selection import SelectKBest select = SelectKBest() list_k = [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 ] ### This functions allows to try other classifiers def declare_NB(): from sklearn.naive_bayes import GaussianNB naive_bayes = GaussianNB() steps = [("scaler", scaler), ('feature_selection', select), ('naive_bayes', naive_bayes)] parameters = dict(feature_selection__k=list_k) return steps, parameters def declare_SVM(): from sklearn.svm import SVC SVM = SVC() steps = [("scaler", scaler), ('feature_selection', select), ('SVM', SVM)] list_C = [1, 2, 3, 4, 5, 10, 100, 1000, 10000] parameters = dict(feature_selection__k=list_k, SVM__kernel=["rbf"], SVM__C=list_C) return steps, parameters, list_C def declare_adaboost(): from sklearn.ensemble import AdaBoostClassifier adaboost = AdaBoostClassifier() steps = [("scaler", scaler), ('feature_selection', select), ('adaboost', adaboost)] parameters = dict(feature_selection__k=list_k) return steps, parameters def declare_Decision_tree(): from sklearn import tree decision_tree = tree.DecisionTreeClassifier() steps = [("scaler", scaler), ('feature_selection', select), ('decision_tree', decision_tree)] min_samp_list = [20, 15, 10, 8, 6, 4] parameters = dict(feature_selection__k=list_k, decision_tree__min_samples_split=min_samp_list) return steps, parameters, min_samp_list def declare_Random_forest(): from sklearn.ensemble import RandomForestClassifier random_forest = RandomForestClassifier() steps = [("scaler", scaler), ('feature_selection', select), ('random_forest', random_forest)] min_samp_list = [20, 15, 10, 8, 6, 4] parameters = dict(feature_selection__k=list_k, random_forest__min_samples_split=min_samp_list) return steps, parameters, min_samp_list #Use the apropiate algorithm if name == name_naive_bayes: steps, parameters = declare_NB() elif name == name_SVM: steps, parameters, list_C = declare_SVM() elif name == name_adaBoost: steps, parameters = declare_adaboost() elif name == name_decision_tree: steps, parameters, min_samp_list = declare_Decision_tree() elif name == name_random_forest: steps, parameters, min_samp_list = declare_Random_forest() from sklearn.cross_validation import ShuffleSplit cv = ShuffleSplit(len(x_train), n_iter=10, test_size=0.1, random_state=0) from sklearn.pipeline import Pipeline pipeline = Pipeline(steps) from sklearn.grid_search import GridSearchCV #Scoring options: # accuracy, f1_weighted, r2, average_precision clf = GridSearchCV(pipeline, cv=cv, param_grid=parameters) #, scoring="f1_weighted") clf.fit(x_train, y_train) def report_NB(): import data_analisis list_mean = [] for param, mean_score, cv_scores in clf.grid_scores_: list_mean.append(mean_score) data_analisis.scatter_plot_from_lists(list_k, list_mean, "NB accuracy by K variables", Algorithms_path, xlabel="Num variables", ylabel="Accuracy") def report_Adaboost(): import data_analisis list_mean = [] for param, mean_score, cv_scores in clf.grid_scores_: list_mean.append(mean_score) data_analisis.scatter_plot_from_lists( list_k, list_mean, "Adaboost accuracy by K variables", Algorithms_path, xlabel="Num variables", ylabel="Accuracy") def report_SVM(): size_k = len(list_k) size_c = len(list_C) print "k=", size_k, "c=", size_c matrix = [[0 for x in range(size_c)] for y in range(size_k)] i = 0 j = 0 last_value_C = list_C[0] for param, mean_score, cv_scores in clf.grid_scores_: if param["SVM__C"] != last_value_C: i += 1 j = 0 last_value_C = param["SVM__C"] #print param, "mean=", mean_score, "i=", i, "j=", j matrix[j][i] = mean_score j += 1 import numpy as np header = [""] + list_C matrix = np.c_[list_k, matrix] u.save_to_csv("SVM.csv", Algorithms_path, matrix, header) u.change_decimal_separator("SVM.csv", Algorithms_path) def report_decision_tree(): size_k = len(list_k) size_min_samp = len(min_samp_list) print "k=", size_k, "min_samples=", size_min_samp matrix = [[0 for x in range(size_min_samp)] for y in range(size_k)] i = 0 j = 0 last_value_min_samples = min_samp_list[0] for param, mean_score, cv_scores in clf.grid_scores_: if param[ "decision_tree__min_samples_split"] != last_value_min_samples: i += 1 j = 0 last_value_min_samples = param[ "decision_tree__min_samples_split"] #print param, "mean=", mean_score, "i=", i, "j=", j matrix[j][i] = mean_score j += 1 import numpy as np header = [""] + min_samp_list matrix = np.c_[list_k, matrix] u.save_to_csv("DecisionTree.csv", Algorithms_path, matrix, header) u.change_decimal_separator("DecisionTree.csv", Algorithms_path) def report_random_forest(): size_k = len(list_k) size_min_samp = len(min_samp_list) print "k=", size_k, "min_samples=", size_min_samp matrix = [[0 for x in range(size_min_samp)] for y in range(size_k)] i = 0 j = 0 last_value_min_samples = list_k[0] for param, mean_score, cv_scores in clf.grid_scores_: if param["feature_selection__k"] != last_value_min_samples: i += 1 j = 0 last_value_min_samples = param["feature_selection__k"] #print param, "mean=", mean_score, "i=", i, "j=", j matrix[i][j] = mean_score j += 1 import numpy as np header = [""] + min_samp_list matrix = np.c_[list_k, matrix] u.save_to_csv("RandomForest.csv", Algorithms_path, matrix, header) u.change_decimal_separator("RandomForest.csv", Algorithms_path) #Use the apropiate algorithm if name == name_naive_bayes: report_NB() elif name == name_SVM: report_SVM() elif name == name_adaBoost: report_Adaboost() elif name == name_decision_tree: report_decision_tree() elif name == name_random_forest: report_random_forest() print "\n\nBest estimator", clf.best_estimator_ print "\n\nBest score", clf.best_score_ print "Trained in", timer.get_time() from process_text import get_metrics_header final_feature_indices = clf.best_estimator_.named_steps[ "feature_selection"].get_support(indices=True) final_feature_list = [ get_metrics_header()[i] for i in final_feature_indices ] print "Selected vars:", final_feature_list u.save_pickle(clf, name, path=ML_path) return clf
def plot_KS_graphs(scores, labels, spline_method, splines, outdir, plotname, title="", showplots=True): # KS stands for Kolmogorov-Smirnov # Plots a graph of the scores and accuracy tks = utils.Timer("Plotting graphs") # Change to numpy, then this will work scores = ensure_numpy(scores) labels = ensure_numpy(labels) # Sort the data order = scores.argsort() scores = scores[order] labels = labels[order] # Accumulate and normalize by dividing by num samples nsamples = utils.len0(scores) integrated_scores = np.cumsum(scores) / nsamples integrated_accuracy = np.cumsum(labels) / nsamples percentile = np.linspace(0.0, 1.0, nsamples) fitted_accuracy, fitted_error = compute_accuracy(scores, labels, spline_method, splines, outdir, plotname, showplots=showplots) # Work out the Kolmogorov-Smirnov error KS_error_max = np.amax(np.absolute(integrated_scores - integrated_accuracy)) if showplots: # Set up the graphs f, ax = plt.subplots(1, 4, figsize=(20, 5)) size = 0.2 f.suptitle( title + f"\nKS-error = {utils.str(float(KS_error_max)*100.0)}%, " f"Probability={utils.str(float(integrated_accuracy[-1])*100.0)}%", fontsize=18, fontweight="bold") # First graph, (accumualated) integrated_scores and integrated_accuracy vs sample number ax[0].plot(100.0 * percentile, integrated_scores, linewidth=3, label='Cumulative Score') ax[0].plot(100.0 * percentile, integrated_accuracy, linewidth=3, label='Cumulative Probability') ax[0].set_xlabel("Percentile", fontsize=16, fontweight="bold") ax[0].set_ylabel("Cumulative Score / Probability", fontsize=16, fontweight="bold") ax[0].legend(fontsize=13) ax[0].set_title('(a)', y=-size, fontweight="bold", fontsize=16) # increase or decrease y as needed ax[0].grid() # Second graph, (accumualated) integrated_scores and integrated_accuracy versus # integrated_scores ax[1].plot(integrated_scores, integrated_scores, linewidth=3, label='Cumulative Score') ax[1].plot(integrated_scores, integrated_accuracy, linewidth=3, label="Cumulative Probability") ax[1].set_xlabel("Cumulative Score", fontsize=16, fontweight="bold") # ax[1].set_ylabel("Cumulative Score / Probability", fontsize=12) ax[1].legend(fontsize=13) ax[1].set_title('(b)', y=-size, fontweight="bold", fontsize=16) # increase or decrease y as needed ax[1].grid() # Third graph, scores and accuracy vs percentile ax[2].plot(100.0 * percentile, scores, linewidth=3, label='Score') ax[2].plot(100.0 * percentile, fitted_accuracy, linewidth=3, label=f"Probability") ax[2].set_xlabel("Percentile", fontsize=16, fontweight="bold") ax[2].set_ylabel("Score / Probability", fontsize=16, fontweight="bold") ax[2].legend(fontsize=13) ax[2].set_title('(c)', y=-size, fontweight="bold", fontsize=16) # increase or decrease y as needed ax[2].grid() # Fourth graph, # integrated_scores ax[3].plot(scores, scores, linewidth=3, label=f"Score") ax[3].plot(scores, fitted_accuracy, linewidth=3, label='Probability') ax[3].set_xlabel("Score", fontsize=16, fontweight="bold") # ax[3].set_ylabel("Score / Probability", fontsize=12) ax[3].legend(fontsize=13) ax[3].set_title('(d)', y=-size, fontweight="bold", fontsize=16) # increase or decrease y as needed ax[3].grid() plt.savefig(os.path.join(outdir, plotname) + '_KS.pdf', bbox_inches="tight") plt.close() tks.free() return KS_error_max
def __init__(self, pygame, screen, road, artifact_id, artifact_def): self.image_files = ['images/sign_traffic_light_green_32.png', 'images/sign_traffic_light_red_32.png'] super().__init__(pygame, screen, road, self.image_files[0], artifact_id, artifact_def) self.red = True self.timer = u.Timer(pygame, 3) self.image = self.load_image(self.image_files[self.red])
:Author: Felipe Jared Guerrero Moreno <*****@*****.**> :Date: Created on Jul 14, 2017 :Description: This module sets up communication with the electrical boards. ''' import datetime import time import numpy import struct import threading import math #import data_packet_generator import utilities import serial requestTimer = utilities.Timer() class motherBoardDataPackets: def __init__(self, serialObject): ''' Sends messages to the Motherboard when necessary. Sends in this format: XXXY ZZZZ frameID = XXX rtr = Y payload = ZZZZ ''' self.motherCom = serialObject