def dataMaker(split=.9): corpus, token_frequency = xml.xml_parser() corpus = five_occurence_pruner(corpus, token_frequency) print "pre prunning zeros" print len(corpus) #prunning new_messages = [] for message in corpus: if len(message.tokens) != 0: new_messages.append(message) corpus = new_messages print "post prunning" print len(corpus) vocabulary = get_vocabulary(corpus) corpus = final_instance_assembler(corpus, vocabulary) corpus = context_gather(corpus) print "Packaging corpus" corpus = Corpus(corpus, split) return corpus
def xml_to_tsv(xml_path, tsv_path) : """ Wrapper function for xml_parser and xml_stream_handler. """ stream_handler = xml_handler(tsv_path) parser = xml_parser(stream_handler) parser.parse_xml_file(xml_path)
def generate_batches(inp): arr = xml_parser.xml_parser('gen_batches_Params.xml', inp) n = len(arr) if n != 3: return False # print(arr) try: list(gen_batches(arr[0], arr[1])) except ValueError: return False
def TreeRegress(inp): arr = xml_parser.xml_parser('TreeRegressor_Params.xml', inp) n = len(arr) if (n != 15): return False print(arr) n_model = arr[0] rng = np.random.RandomState(1) # value for max depth if (arr[5] != None): arr[5] = 2 * np.random.randint(1, arr[1]) if (arr[7] == float(int(arr[7]))): arr[7] = int(arr[7]) # value for max_features if (arr[9] == 'int'): arr[9] = np.random.randint(1, arr[1]) elif arr[9] == 'float': arr[9] = random.uniform(0, 1) * arr[1] elif arr[9] == 'None': arr[9] = None if (arr[10] == 'None'): arr[10] = None try: train_X, train_y = make_regression(n_samples=arr[0], n_features=arr[1], n_informative=arr[2]) print("done1") except ValueError: print("error1") return False try: random_forest = RandomForestRegressor(n_estimators=arr[3], criterion=arr[4], max_depth=arr[5], min_samples_split=arr[6], min_samples_leaf=arr[7], min_weight_fraction_leaf=arr[8], max_features=arr[9], max_leaf_nodes=arr[10], min_impurity_decrease=arr[11], bootstrap=arr[12], oob_score=arr[13], warm_start=arr[14]) print("done2") except ValueError: print("error2") return False try: random_forest.fit(train_X, train_y) except ValueError: print("error3") return False return True
def minibatch_kmeans(inp): arr = xml_parser.xml_parser('minibatch_kmeans_Params.xml', inp) n = len(arr) # print(n) if n != 15: return False try: if (arr[2] == 0): X, y = make_classification(n_samples=arr[0], n_features=arr[1], n_informative=2, n_classes=arr[3]) elif (arr[2] == 1): X, y = dataset_fixed_cov(arr[0], arr[1]) elif (arr[2] == 2): X, y = dataset_cov(arr[0], arr[1]) # else: # X, y = make_circles(arr[0]) except ValueError: return False print(arr) X = StandardScaler().fit_transform(X) if arr[9] == 'None': arr[9] = None else: arr[9] = 2 if arr[12] == 0: arr[12] = None elif arr[12] < arr[3]: arr[12] = arr[3] try: MBKM = cluster.MiniBatchKMeans(n_clusters=arr[3], init=arr[4], max_iter=arr[5], batch_size=arr[6], verbose=arr[7], compute_labels=arr[8], random_state=arr[9], tol=arr[10], max_no_improvement=arr[11], init_size=arr[12], n_init=arr[13], reassignment_ratio=arr[14]) MBKM.fit(X) print("Done!") except ValueError: return False
def generate_data_classification(inp): arr = xml_parser.xml_parser('make_classification_Params.xml', inp) n = len(arr) if n != 10: return False if arr[7] == 'False': arr[7] = False if arr[9] == 'None': arr[9] = None else: arr[9] = 2 # print(arr) signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(10) try: X, y = make_multilabel_classification(n_samples=arr[0], n_features=arr[1], n_classes=arr[2], n_labels=arr[3], length=arr[4], allow_unlabeled=arr[5], sparse=arr[6], return_indicator=arr[7], return_distributions=arr[8], random_state=arr[9]) except ValueError as err: # print("value error: ") # print(sys.exc_info()) return False except ZeroDivisionError as err: # print("zero division error: ") # print(err) return False except: # print("Unexpected error:") # print(sys.exc_info()[0]) raise return False finally: signal.alarm(0) # print("After: %s" % time.strftime("%M:%S")) return True return True
def logistic_regression(inp): arr = xml_parser.xml_parser('logistic_regression_Params.xml', inp) n = len(arr) if n != 14: return False if arr[5] != 'newton-cg': return False try: X, y = make_classification(n_samples=arr[0], n_features=arr[1], n_informative=arr[2], n_classes=arr[3]) except ValueError: # print("here") return False t = 0 if arr[8] == 0.0 and arr[13] == 'multinomial' and arr[6] == 'l2': t = 1 else: t = 2 try: clf = LogisticRegression(penalty=arr[6], dual=arr[7], tol=arr[8], C=arr[9], fit_intercept=arr[10], intercept_scaling=arr[11], solver=arr[5], n_jobs=arr[4], max_iter=arr[12], multi_class=arr[13]) clf.fit(X, y) # print("here1") except ValueError: # print("here2") return False except IOError: return False # except KeyError: # # print("here3") # return False return True
def GaussianProcess(inp): arr = xml_parser.xml_parser('Gaussian_Proc_Params.xml', inp) n = len(arr) if n != 14: return False try: X, y = make_classification(n_samples=arr[0], n_features=arr[1], n_informative=arr[2], n_classes=arr[3]) except ValueError: # print("here") return False print(arr) kernel = arr[4] * RBF([1.0 for i in range(arr[5])]) if (arr[6] == 'None'): arr[6] = None if arr[11] == 'None': arr[11] = None else: arr[11] = 2 # X = StandardScaler().fit_transform(X) try: clf = GaussianProcessClassifier(kernel=kernel, optimizer=arr[6], n_restarts_optimizer=arr[7], max_iter_predict=arr[8], warm_start=arr[9], copy_X_train=arr[10], random_state=arr[11], multi_class=arr[12], n_jobs=arr[13]) clf.fit(X, y) # print("here1") except ValueError: # print("here2") return False # except KeyError: # # print("here3") # return False return True
def logistic_regression(inp): arr = xml_parser.xml_parser('logistic_regression_Params.xml', inp) n = len(arr) if n != 14: return False try: X, y = make_classification(n_samples=arr[0], n_features=arr[1], n_informative=arr[2], n_classes=arr[3]) except ValueError: # pass # print("here") return False print(arr) # print("here") # try: # with parallel_backend(backend): # print("here0") try: clf = LogisticRegression(penalty=arr[6], dual=arr[7], tol=arr[8], C=arr[9], fit_intercept=arr[10], intercept_scaling=arr[11], solver=arr[5], n_jobs=arr[4], max_iter=arr[12], multi_class=arr[13]) clf.fit(X, y) # print("here1") except ValueError: # pass # print("here2") return False # except KeyError: # # print("here3") # return False return True
def open_cad( self, path, isthread=False ): #change to false to use threads, needs cheking the concurency! if not isthread: thread = Thread(target=self.open_cad, args=( path, True, )) thread.start() return if not os.path.isfile(path): print path, "does not exist!" return self.gui.clean_status = False self.data.printStatus("DXF import - parse file ...") self.data.project.layout.path = path #choose importer based on extension ext = path.split(".")[-1] if ext == "xml": parser = xml_parser.xml_parser() if ext == "dxf": parser = dxf_parser.dxf_parser() #start parser parser.parse(path) self.data.project.layout.viewports = parser.viewports self.data.active_viewport = parser.viewports[parser.viewports.keys() [0]] self.colorizeLayers() #execute post stuff in gtk main thread gobject.idle_add(self.gui.updateViewportList) self.data.printStatus("DXF import - done") self.gui.clean_status = True
def main(): filepath = sys.argv[1] # job..out file if not os.path.isfile(filepath): print("File path {} does not exist. Exiting...".format(filepath)) sys.exit() f = open(sys.argv[2], "w") # output file fp = open(filepath, "r") cnt = 0 is_considering = False path = "" time = "" input_val = "" for line in fp: if "The path" in line and cnt == 0: line = line.strip() is_considering = True path = line.split(": ")[1] cnt += 1 elif is_considering and cnt == 1: cnt += 1 elif is_considering and cnt == 2: line = line.strip() time = line cnt += 1 elif is_considering and cnt == 3: inp = line.strip() cnt = 0 is_considering = False arr = xml_parser.xml_parser(sys.argv[3], inp) # input xml file if len(arr) != int(sys.argv[4]): # num. parameteres in xml continue else: f.write(str(arr) + "," + time + "," + path + "\n") path = "" time = "" input_val = "" cnt += 1 f.close()
def __init__(self, parent = None): """ Constructor """ hw_info_in_xml() QMainWindow.__init__(self, parent) self.setupUi(self) hw_info=xml_parser("./resources/hw.xml") stack=[hw_info.first_node_key] hw_info=hw_info.hw_info stack1 = [self.treeWidget] while len(stack)!=0 : t=stack.pop() try: hw_info[t]["description"] except KeyError : try: t=stack.pop() except IndexError: pass item = QtGui.QTreeWidgetItem(stack1.pop()) item.setData(1, 1, list(t)) #print list(t) #for i in item.data(1, 1).toList() : #print i.toString() try: item.setText(0, hw_info[t]["description"]) except KeyError : pass tmp=hw_info[t]['child_nodes'] tmp.reverse() for i in tmp: stack.append(i) stack1.append(item) #self.sys_info_view.setUrl(QUrl("file:///home/boss/py/resources/system-info.html")) self.treeWidget.show()
def DecisionTree(inp): arr = xml_parser.xml_parser('Decision_Tree_Classifier_Params.xml', inp) n = len(arr) if n != 18: return False try: X, y = make_classification(n_samples=arr[0], n_features=arr[1], n_informative=arr[2], n_classes=arr[3]) except ValueError: # print("here") return False print(arr) if (arr[6] == 'None'): arr[6] = None else: arr[6] = random.randint(1, 100) if arr[7] == int(arr[7]): arr[7] = int(arr[7]) else: arr[7] = arr[7] / 100.0 if arr[8] == int(arr[8]): arr[8] = int(arr[8]) else: arr[8] = arr[8] / 50.0 if arr[10] == 'val': if arr[11] == int(min(arr[11], arr[1])): arr[11] = int(arr[11]) else: arr[11] = arr[11] / 10.0 elif arr[10] == 'None': arr[11] = None else: arr[11] = arr[10] if arr[12] == 'None': arr[12] = None else: arr[12] = random.randint(1, 10) if arr[13] == 'None': arr[13] = None else: arr[13] = random.randint(1, 100) if arr[16] == 'None': arr[16] = None elif arr[16] == 'weighted': weight_lst = {} for class_num in range(arr[3]): weight_lst[class_num] = random.randint(1, 5) arr[16] = weight_lst # X = StandardScaler().fit_transform(X) try: clf = DecisionTreeClassifier(criterion=arr[4], splitter=arr[5], max_depth=arr[6], min_samples_split=arr[7], min_samples_leaf=arr[8], min_weight_fraction_leaf=arr[9], max_features=arr[11], random_state=arr[12], max_leaf_nodes=arr[13], min_impurity_decrease=arr[14], class_weight=arr[16], presort=arr[17]) clf.fit(X, y) print("here1") except ValueError: print("here2") return False # except KeyError: # # print("here3") # return False return True
This is the main file. lines fron 29 to 50 need to be run once, those lines are for computing pCTR for each campaignand computing data statistics. For the first time uncomment those lines and comment them when the first run is finished. """ from Data_statistics import data_statistics_test from Data_statistics import data_statistics_train from CTR_estimation import ctr_estimate_basedcampaign from CTR_estimation import ctr_Global_estimate from Auctions import auction_based_adviser from Auctions import auction_adviser_less from Generate_statistics import generate_statistics_based_adviser from Generate_statistics import generate_statistics_adviser_Less import pandas as pd from xml_parser import xml_parser DSP_list, mode = xml_parser( '../Conf_file/Conf_file_mode2.xml') # read the configuration file ############################################################# ############################################################ ########################################################### #campaings=['2997','2821','2261','2259','3358','3386','3427','3476','1458'] #statistics_train=pd.DataFrame() #statistics_test=pd.DataFrame() #data_test_all=pd.read_csv("../all/test.log.txt", header=0, sep='\t', index_col=False,engine='python')# load all test set merged in one file #global_pCTR=pd.DataFrame() #global_pCTR['payprice']=data_test_all['payprice'] #for campaign in campaings: # data_train=pd.read_csv("../"+campaign+"/train.log.txt", header=0, sep='\t', index_col=False,engine='python')# load train data # data_test=pd.read_csv("../"+campaign+"/test.log.txt", header=0, sep='\t', index_col=False,engine='python')# load test data # pCTR=ctr_estimate_basedcampaign(campaign,data_train,data_test) # estimate the CTR for the campaign
def on_treeWidget_itemClicked(self, item, column): """ Slot documentation goes here. """ t=[] [t.append(str(i.toString()))for i in item.data(1, 1).toList() ] hw_info=xml_parser("./resources/hw.xml").hw_info node=hw_info[tuple(t)] #print node #print dir(self.tableWidget) #self.tableWidget.clear() row_count=self.tableWidget.rowCount() while row_count !=-1: self.tableWidget.removeRow(row_count) row_count=row_count-1 row=0 for i, j in node.iteritems(): #print i #print j if i=="configuration" : self.tableWidget.insertRow (row) item = QtGui.QTableWidgetItem(i+' :') self.tableWidget.setItem(row, 0, item) row=row+1 for id, value in j.iteritems(): self.tableWidget.insertRow (row) item = QtGui.QTableWidgetItem(id) item1 = QtGui.QTableWidgetItem(value) self.tableWidget.setItem(row, 0, item) self.tableWidget.setItem(row, 1, item1) row=row+1 elif i=="capabilities" : self.tableWidget.insertRow (row) #print dir(self.tableWidget) item = QtGui.QTableWidgetItem(i+' :') self.tableWidget.setItem(row, 0, item) row=row+1 for k in j: item1 = QtGui.QTableWidgetItem(k) self.tableWidget.insertRow (row) self.tableWidget.setItem(row, 1, item1) row+=1 elif i=="resources" : self.tableWidget.insertRow (row) item = QtGui.QTableWidgetItem(i+' :') self.tableWidget.setItem(row, 0, item) row=row+1 for id, value in j.iteritems(): self.tableWidget.insertRow (row) item = QtGui.QTableWidgetItem(id) item1 = QtGui.QTableWidgetItem(value) self.tableWidget.setItem(row, 0, item) self.tableWidget.setItem(row, 1, item1) row=row+1 elif i=="child_nodes" : pass else : self.tableWidget.insertRow (row) item = QtGui.QTableWidgetItem(i) item1 = QtGui.QTableWidgetItem(j) self.tableWidget.setItem(row, 0, item) self.tableWidget.setItem(row, 1, item1) row=row+1
def run_driver(seed_input): inp_program_instr = FunctionCoverageRunner(input_program) mutation_fuzzer = MutationCoverageFuzzer(seed=seed_inputs, min_mutations=1, max_mutations=5) for i in range(NUM_ITER): mutation_fuzzer.runs(inp_program_instr, trials=TRIASL_EACH_ITER) for key in mutation_fuzzer.coverages_seen.keys(): print("The path key is: " + str(key)) max_int = mutation_fuzzer.coverages_seen[key].index( max(mutation_fuzzer.coverages_seen[key])) print("step: " + str(i + 1)) print(max(mutation_fuzzer.coverages_seen[key])) print(mutation_fuzzer.population[key][max_int]) # print("Best input and coverage overall!") # max_int = mutation_fuzzer.coverages_seen.index(max(mutation_fuzzer.coverages_seen)) # print(max(mutation_fuzzer.coverages_seen)) # print(mutation_fuzzer.population[max_int]) # store the results! # based on the length of inputs! if INPUT_SIZE: f1 = open( "complexity_driver_" + str(input_program).split(" ")[1] + ".csv", "w") for key in mutation_fuzzer.coverages_seen.keys(): included_lines = [] index = 0 for inp_pop in mutation_fuzzer.population[key]: if len(inp_pop) not in included_lines: avail_ind = [ i for i in range(0, len(mutation_fuzzer.population[key])) if len(mutation_fuzzer.population[key][i]) == len( inp_pop) ] c1 = np.max([ mutation_fuzzer.coverages_seen[key][i] for i in avail_ind ]) f1.write( str(len(inp_pop)) + "," + str(c1) + "," + str(key) + "\n") included_lines.append(len(inp_pop)) index += 1 if key in mutation_fuzzer.model_fit.keys(): print("The path key is: " + str(key)) print("The model is: " + str(mutation_fuzzer.model_fit[key])) print("The cluster is: " + str(mutation_fuzzer.path_cluster[key])) else: print("The path key is: " + str(key)) print("no model for the above key") for clust in mutation_fuzzer.cluster_paths.keys(): print(mutation_fuzzer.cluster_paths[clust]) f1.close() # based on some parts of inputs else: f1 = open( "complexity_driver_" + str(input_program).split(" ")[1] + ".csv", "w") for key in mutation_fuzzer.coverages_seen.keys(): included_lines = {} index = 0 for inp_pop in mutation_fuzzer.population[key]: arr = xml_parser.xml_parser(input_program_tree, inp_pop) # choose the size parameter if (len(arr) == NUM_PARAMETERS): len_inp_pop = 1 for index in SIZE_INDEX: len_inp_pop *= arr[index] else: index += 1 continue if len_inp_pop not in included_lines: c1 = mutation_fuzzer.coverages_seen[key][index] f1.write( str(arr) + "," + str(len_inp_pop) + "," + str(c1) + "," + str(key) + "\n") included_lines[len_inp_pop] = c1 else: c1 = mutation_fuzzer.coverages_seen[key][index] max_val = included_lines[len_inp_pop] if max_val < c1: f1.write( str(arr) + "," + str(len_inp_pop) + "," + str(c1) + "," + str(key) + "\n") included_lines[len_inp_pop] = c1 index += 1 f1.close()
def run(self, runner): """Run function(inp) while tracking coverage. If we reach new coverage, add inp to population and its coverage to population_coverage """ try: result, outcome = super(MutationCoverageFuzzer, self).run(runner) except TimeoutError as error: print("Caght an error!") return "" key_path = runner.coverage()[0] if Actual_Time: val_cost = self.time_cost else: val_cost = runner.coverage()[1] if key_path in self.removed_path: return "" self.new_coverage = val_cost self.num_inp += 1 # Do Fitting and Clustering if INPUT_SIZE and DO_CLUSTERING and self.num_inp % STEPS_TO_DO_CLUSTERING == 0: for key_path in self.coverages_seen.keys(): if key_path in self.updated_since_clustered.keys( ) and self.updated_since_clustered[key_path] == False: continue X = [len(x) for x in self.population[key_path]] y = [x for x in self.coverages_seen[key_path]] if len(set(X)) >= DEGREE_TO_FIT + 1: vals, stats_res = P.polyfit(X, y, DEGREE_TO_FIT, full=True) self.model_fit[key_path] = (vals, stats_res[0]) path_orders = [] for key_path in self.model_fit.keys(): if self.updated_since_clustered[key_path] == False: path_orders.append(key_path) else: self.updated_since_clustered[key_path] = False path_orders.append(key_path) self.eval_functions[key_path] = [ self.model_fit[key_path][0][1] * i_size + self.model_fit[key_path][0][0] for i_size in range(1, MAX_SIZE) ] eval_functions_array = np.array([ self.eval_functions[key] for key in self.eval_functions.keys() ]) if len(self.eval_functions.keys()) >= NUM_CLUSTERS: kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=1).fit(eval_functions_array) self.cluster_paths = {} for i, clust in enumerate(kmeans.labels_): self.path_cluster[path_orders[i]] = clust if clust in self.cluster_paths.keys(): self.cluster_paths[clust].append(path_orders[i]) else: self.cluster_paths[clust] = [path_orders[i]] if DO_CLUSTERING and self.num_inp % STEPS_TO_KILL == 0: max_val = -1 max_path = -1 max_clust = -1 for cluster in self.cluster_paths.keys(): for x in self.cluster_paths[cluster]: if self.worst_costs[x] > max_val: max_val = self.worst_costs[x] max_path = x max_clust = cluster for key_path in self.coverages_seen.keys(): if key_path not in self.model_fit.keys(): if self.worst_costs[key_path] < max_val: self.population.pop(key_path, None) self.coverages_seen.pop(key_path, None) self.worst_costs.pop(key_path, None) self.last_update.pop(key_path, None) self.removed_path.append(key_path) # size limitation based on the number of arguments inp_num_args = self.inp.split(" ") is_interesting = True if not key_path in self.coverages_seen.keys(): if len(self.inp) <= MAX_SIZE: self.coverages_seen[key_path] = [val_cost] self.population[key_path] = [self.inp] self.worst_costs[key_path] = val_cost self.last_update[key_path] = self.num_inp self.updated_since_clustered[key_path] = True else: is_interesting = False # this is based on the length of input string elif outcome == Runner.PASS and INPUT_SIZE and self.new_coverage > np.percentile( self.coverages_seen[key_path], PERCENTAGE_TO_KEEP) and len( self.inp) <= MAX_SIZE and len(self.inp) <= np.median( map(len, self.population[key_path]) ) + CONSTANT_FACTOR * np.sqrt( np.median(map(len, self.population[key_path]))): self.population[key_path].append(self.inp) self.coverages_seen[key_path].append(self.new_coverage) # this is based on the arguments in the input elif outcome == Runner.PASS and not INPUT_SIZE and self.new_coverage > np.percentile( self.coverages_seen[key_path], PERCENTAGE_TO_KEEP) and len(inp_num_args) <= NUM_PARAMETERS: arr = xml_parser.xml_parser(input_program_tree, self.inp) # choose the size parame if (len(arr) == NUM_PARAMETERS): len_inp_pop = 1 for index in SIZE_INDEX: len_inp_pop *= arr[index] else: len_inp_pop = 0 is_interesting = False if is_interesting and key_path not in self.len_inputs.keys(): self.population[key_path].append(self.inp) self.coverages_seen[key_path].append(self.new_coverage) self.len_inputs[key_path] = len_inp_pop elif is_interesting and len_inp_pop <= self.len_inputs[ key_path] + CONSTANT_FACTOR * np.sqrt( self.len_inputs[key_path]): self.population[key_path].append(self.inp) self.coverages_seen[key_path].append(self.new_coverage) if len_inp_pop > self.len_inputs[key_path]: self.len_inputs[key_path] = len_inp_pop else: is_interesting = False # not interesting else: is_interesting = False if is_interesting and self.worst_costs[key_path] < val_cost: self.worst_costs[key_path] = val_cost if is_interesting: self.last_update[key_path] = self.num_inp self.updated_since_clustered[key_path] = True elif len(self.inp) > MAX_SIZE: return "" elif not key_path in self.path_cluster.keys( ) and self.num_inp - self.last_update[key_path] > STEPS_TO_KILL and len( self.population ) > MIN_NUM_PATH and self.worst_costs[key_path] <= np.percentile( self.worst_costs.values(), 50): min = self.worst_costs[key_path] min_path = key_path for path_other in self.worst_costs.keys(): if self.worst_costs[path_other] < min: min = self.worst_costs[path_other] min_path = path_other if key_path == min_path: self.population.pop(key_path, None) self.coverages_seen.pop(key_path, None) self.worst_costs.pop(key_path, None) self.last_update.pop(key_path, None) self.removed_path.append(key_path) return "" else: self.population.pop(min_path, None) self.coverages_seen.pop(min_path, None) self.worst_costs.pop(min_path, None) self.last_update.pop(min_path, None) self.removed_path.append(min_path) elif key_path in self.path_cluster.keys( ) and ALLOWED_REMOVE_CLUSTER_PATH and self.num_inp - self.last_update[ key_path] > STEPS_TO_KILL and len(self.cluster_paths[ self.path_cluster[key_path]]) > MIN_NUM_PATH_PER_CLUST: clust = self.path_cluster[key_path] for x in self.cluster_paths[clust]: if self.worst_costs[x] > self.worst_costs[key_path]: self.cluster_paths[clust].remove(key_path) self.path_cluster.pop(key_path, None) self.model_fit.pop(key_path, None) self.updated_since_clustered.pop(key_path, None) self.eval_functions.pop(key_path, None) self.population.pop(key_path, None) self.coverages_seen.pop(key_path, None) self.worst_costs.pop(key_path, None) self.last_update.pop(key_path, None) self.removed_path.append(key_path) break if is_interesting == False: result = "" if len(self.population[key_path]) > MAX_POP_SIZE: if INPUT_SIZE: indicies = sorted( range(len(self.coverages_seen[key_path])), key=lambda i: self.coverages_seen[key_path][i] / (len(self.population[key_path][i])))[-MAX_POP_SIZE / 8:] self.coverages_seen_new = [] self.population_new = [] for index in indicies: self.population_new.append( self.population[key_path][index]) self.coverages_seen_new.append( self.coverages_seen[key_path][index]) self.population[key_path] = self.population_new self.coverages_seen[key_path] = self.coverages_seen_new else: indicies = [] for k in range(MAX_POP_SIZE / 20): indicies.append(random.randint(0, 7 * MAX_POP_SIZE / 8)) self.coverages_seen_new = [] self.population_new = [] for index in indicies: self.population_new.append( self.population[key_path][index]) self.coverages_seen_new.append( self.coverages_seen[key_path][index]) self.population_new = self.population_new + self.population[ key_path][-MAX_POP_SIZE / 8:] self.coverages_seen_new = self.coverages_seen_new + self.coverages_seen[ key_path][-MAX_POP_SIZE / 8:] self.population[key_path] = self.population_new self.coverages_seen[key_path] = self.coverages_seen_new return result
def disc_analysis(inp): arr = xml_parser.xml_parser('Discriminant_Analysis_Params.xml', inp) n = len(arr) if n != 13: return False try: # if(arr[5]=='eigen' and arr[1] > 3): # arr[1] = 3 # if(arr[5]=='lsqr' and arr[1] > 3): # arr[1] = 3 # X, y = make_classification(n_samples=arr[0], n_features=arr[1], # n_informative=arr[2], n_classes=arr[3]) if (arr[12] == 0): X, y = dataset_fixed_cov(arr[0], arr[1]) elif (arr[12] == 1): X, y = dataset_cov(arr[0], arr[1]) # print("here!!") else: if (arr[5] == 'svd'): X, y = make_classification(n_samples=arr[0], n_features=arr[1], n_informative=arr[2], n_classes=arr[3]) else: return False # print("done1") except ValueError: # print("error1") return False # print("here") # value for parameter_6 if (arr[6] == "float"): arr[6] = random.uniform(0, 1) elif (arr[6] == "auto"): arr[6] = "auto" else: arr[6] = None # value for parameter_7 # if(arr[7]!=None): # val_7 = np.random.dirichlet(np.ones(arr[3]),size=1.0) # else: arr[7] = None # value for parameter_8 if (arr[8] != 'None'): arr[8] = np.random.randint(1, arr[3]) else: arr[8] = None # Note in sklearn page if (arr[5] == 'svd' and arr[6] != None): return False print(arr) if (arr[4]): try: # Linear Discriminant Analysis lda = LinearDiscriminantAnalysis(solver=arr[5], shrinkage=arr[6], priors=arr[7], n_components=arr[8], store_covariance=arr[9], tol=arr[10]) # print("done2") except ValueError: # print("error2") return False try: y_pred = lda.fit(X, y) # print("done3") except TypeError: # print("error3") return False else: try: # Quadratic Discriminant Analysis qda = QuadraticDiscriminantAnalysis(priors=arr[7], reg_param=arr[11], store_covariance=[9], tol=arr[10]) # print("here21") except ValueError: return False try: y_pred = qda.fit(X, y) # print("here22") except TypeError: return False