def dataMaker(split=.9):
    corpus, token_frequency = xml.xml_parser()

    corpus = five_occurence_pruner(corpus, token_frequency)

    print "pre prunning zeros"
    print len(corpus)
    #prunning
    new_messages = []
    for message in corpus:
        if len(message.tokens) != 0:
            new_messages.append(message)
    corpus = new_messages

    print "post prunning"
    print len(corpus)

    vocabulary = get_vocabulary(corpus)
    corpus = final_instance_assembler(corpus, vocabulary)

    corpus = context_gather(corpus)

    print "Packaging corpus"
    corpus = Corpus(corpus, split)

    return corpus
예제 #2
0
def xml_to_tsv(xml_path, tsv_path) :
	"""
	Wrapper function for xml_parser and xml_stream_handler.

	"""
	stream_handler = xml_handler(tsv_path)
	parser = xml_parser(stream_handler)
	parser.parse_xml_file(xml_path)
예제 #3
0
def generate_batches(inp):
    arr = xml_parser.xml_parser('gen_batches_Params.xml', inp)
    n = len(arr)
    if n != 3:
        return False
#    print(arr)
    try:
        list(gen_batches(arr[0], arr[1]))
    except ValueError:
        return False
예제 #4
0
def TreeRegress(inp):
    arr = xml_parser.xml_parser('TreeRegressor_Params.xml', inp)
    n = len(arr)
    if (n != 15):
        return False
    print(arr)
    n_model = arr[0]
    rng = np.random.RandomState(1)
    # value for max depth
    if (arr[5] != None):
        arr[5] = 2 * np.random.randint(1, arr[1])
    if (arr[7] == float(int(arr[7]))):
        arr[7] = int(arr[7])
    # value for max_features
    if (arr[9] == 'int'):
        arr[9] = np.random.randint(1, arr[1])
    elif arr[9] == 'float':
        arr[9] = random.uniform(0, 1) * arr[1]
    elif arr[9] == 'None':
        arr[9] = None
    if (arr[10] == 'None'):
        arr[10] = None

    try:
        train_X, train_y = make_regression(n_samples=arr[0],
                                           n_features=arr[1],
                                           n_informative=arr[2])
        print("done1")
    except ValueError:
        print("error1")
        return False
    try:
        random_forest = RandomForestRegressor(n_estimators=arr[3],
                                              criterion=arr[4],
                                              max_depth=arr[5],
                                              min_samples_split=arr[6],
                                              min_samples_leaf=arr[7],
                                              min_weight_fraction_leaf=arr[8],
                                              max_features=arr[9],
                                              max_leaf_nodes=arr[10],
                                              min_impurity_decrease=arr[11],
                                              bootstrap=arr[12],
                                              oob_score=arr[13],
                                              warm_start=arr[14])
        print("done2")
    except ValueError:
        print("error2")
        return False
    try:
        random_forest.fit(train_X, train_y)
    except ValueError:
        print("error3")
        return False
    return True
예제 #5
0
def minibatch_kmeans(inp):
    arr = xml_parser.xml_parser('minibatch_kmeans_Params.xml', inp)
    n = len(arr)
    #    print(n)
    if n != 15:
        return False
    try:
        if (arr[2] == 0):
            X, y = make_classification(n_samples=arr[0],
                                       n_features=arr[1],
                                       n_informative=2,
                                       n_classes=arr[3])
        elif (arr[2] == 1):
            X, y = dataset_fixed_cov(arr[0], arr[1])
        elif (arr[2] == 2):
            X, y = dataset_cov(arr[0], arr[1])
        # else:
        #     X, y = make_circles(arr[0])
    except ValueError:
        return False
    print(arr)
    X = StandardScaler().fit_transform(X)

    if arr[9] == 'None':
        arr[9] = None
    else:
        arr[9] = 2

    if arr[12] == 0:
        arr[12] = None
    elif arr[12] < arr[3]:
        arr[12] = arr[3]

    try:
        MBKM = cluster.MiniBatchKMeans(n_clusters=arr[3],
                                       init=arr[4],
                                       max_iter=arr[5],
                                       batch_size=arr[6],
                                       verbose=arr[7],
                                       compute_labels=arr[8],
                                       random_state=arr[9],
                                       tol=arr[10],
                                       max_no_improvement=arr[11],
                                       init_size=arr[12],
                                       n_init=arr[13],
                                       reassignment_ratio=arr[14])
        MBKM.fit(X)
        print("Done!")
    except ValueError:
        return False
예제 #6
0
def generate_data_classification(inp):
    arr = xml_parser.xml_parser('make_classification_Params.xml', inp)
    n = len(arr)
    if n != 10:
        return False

    if arr[7] == 'False':
        arr[7] = False

    if arr[9] == 'None':
        arr[9] = None
    else:
        arr[9] = 2

    # print(arr)
    signal.signal(signal.SIGALRM, timeout_handler)
    signal.alarm(10)

    try:
        X, y = make_multilabel_classification(n_samples=arr[0],
                                              n_features=arr[1],
                                              n_classes=arr[2],
                                              n_labels=arr[3],
                                              length=arr[4],
                                              allow_unlabeled=arr[5],
                                              sparse=arr[6],
                                              return_indicator=arr[7],
                                              return_distributions=arr[8],
                                              random_state=arr[9])
    except ValueError as err:
        # print("value error: ")
        # print(sys.exc_info())
        return False
    except ZeroDivisionError as err:
        # print("zero division error: ")
        # print(err)
        return False
    except:
        # print("Unexpected error:")
        # print(sys.exc_info()[0])
        raise
        return False
    finally:
        signal.alarm(0)
        # print("After: %s" % time.strftime("%M:%S"))
        return True
    return True
예제 #7
0
def logistic_regression(inp):
    arr = xml_parser.xml_parser('logistic_regression_Params.xml', inp)
    n = len(arr)
    if n != 14:
        return False
    if arr[5] != 'newton-cg':
        return False

    try:
        X, y = make_classification(n_samples=arr[0],
                                   n_features=arr[1],
                                   n_informative=arr[2],
                                   n_classes=arr[3])
    except ValueError:
        # print("here")
        return False
    t = 0
    if arr[8] == 0.0 and arr[13] == 'multinomial' and arr[6] == 'l2':
        t = 1
    else:
        t = 2

    try:
        clf = LogisticRegression(penalty=arr[6],
                                 dual=arr[7],
                                 tol=arr[8],
                                 C=arr[9],
                                 fit_intercept=arr[10],
                                 intercept_scaling=arr[11],
                                 solver=arr[5],
                                 n_jobs=arr[4],
                                 max_iter=arr[12],
                                 multi_class=arr[13])
        clf.fit(X, y)
        # print("here1")
    except ValueError:
        # print("here2")
        return False
    except IOError:
        return False
    # except KeyError:
    #     # print("here3")
    #     return False
    return True
예제 #8
0
def GaussianProcess(inp):
    arr = xml_parser.xml_parser('Gaussian_Proc_Params.xml', inp)
    n = len(arr)
    if n != 14:
        return False
    try:
        X, y = make_classification(n_samples=arr[0],
                                   n_features=arr[1],
                                   n_informative=arr[2],
                                   n_classes=arr[3])
    except ValueError:
        # print("here")
        return False
    print(arr)
    kernel = arr[4] * RBF([1.0 for i in range(arr[5])])

    if (arr[6] == 'None'):
        arr[6] = None

    if arr[11] == 'None':
        arr[11] = None
    else:
        arr[11] = 2
    # X = StandardScaler().fit_transform(X)
    try:
        clf = GaussianProcessClassifier(kernel=kernel,
                                        optimizer=arr[6],
                                        n_restarts_optimizer=arr[7],
                                        max_iter_predict=arr[8],
                                        warm_start=arr[9],
                                        copy_X_train=arr[10],
                                        random_state=arr[11],
                                        multi_class=arr[12],
                                        n_jobs=arr[13])
        clf.fit(X, y)
        # print("here1")
    except ValueError:
        # print("here2")
        return False
    # except KeyError:
    #     # print("here3")
    #     return False
    return True
예제 #9
0
def logistic_regression(inp):
    arr = xml_parser.xml_parser('logistic_regression_Params.xml', inp)
    n = len(arr)
    if n != 14:
        return False
    try:
        X, y = make_classification(n_samples=arr[0],
                                   n_features=arr[1],
                                   n_informative=arr[2],
                                   n_classes=arr[3])
    except ValueError:
        #	pass
        #        print("here")
        return False
    print(arr)
    # print("here")
    # try:
    #     with parallel_backend(backend):
    #         print("here0")
    try:
        clf = LogisticRegression(penalty=arr[6],
                                 dual=arr[7],
                                 tol=arr[8],
                                 C=arr[9],
                                 fit_intercept=arr[10],
                                 intercept_scaling=arr[11],
                                 solver=arr[5],
                                 n_jobs=arr[4],
                                 max_iter=arr[12],
                                 multi_class=arr[13])
        clf.fit(X, y)


#        print("here1")
    except ValueError:
        #	pass
        #        print("here2")
        return False
    # except KeyError:
    #     # print("here3")
    #     return False
    return True
예제 #10
0
    def open_cad(
        self,
        path,
        isthread=False
    ):  #change to false to use threads, needs cheking the concurency!
        if not isthread:
            thread = Thread(target=self.open_cad, args=(
                path,
                True,
            ))
            thread.start()
            return

        if not os.path.isfile(path):
            print path, "does not exist!"
            return

        self.gui.clean_status = False
        self.data.printStatus("DXF import - parse file ...")

        self.data.project.layout.path = path

        #choose importer based on extension
        ext = path.split(".")[-1]
        if ext == "xml":
            parser = xml_parser.xml_parser()
        if ext == "dxf":
            parser = dxf_parser.dxf_parser()

        #start parser
        parser.parse(path)
        self.data.project.layout.viewports = parser.viewports
        self.data.active_viewport = parser.viewports[parser.viewports.keys()
                                                     [0]]
        self.colorizeLayers()

        #execute post stuff in gtk main thread
        gobject.idle_add(self.gui.updateViewportList)

        self.data.printStatus("DXF import - done")
        self.gui.clean_status = True
예제 #11
0
def main():
    filepath = sys.argv[1]  # job..out file
    if not os.path.isfile(filepath):
        print("File path {} does not exist. Exiting...".format(filepath))
        sys.exit()
    f = open(sys.argv[2], "w")  # output file
    fp = open(filepath, "r")
    cnt = 0
    is_considering = False
    path = ""
    time = ""
    input_val = ""
    for line in fp:
        if "The path" in line and cnt == 0:
            line = line.strip()
            is_considering = True
            path = line.split(": ")[1]
            cnt += 1
        elif is_considering and cnt == 1:
            cnt += 1
        elif is_considering and cnt == 2:
            line = line.strip()
            time = line
            cnt += 1
        elif is_considering and cnt == 3:
            inp = line.strip()
            cnt = 0
            is_considering = False
            arr = xml_parser.xml_parser(sys.argv[3], inp)  # input xml file
            if len(arr) != int(sys.argv[4]):  # num. parameteres in xml
                continue
            else:
                f.write(str(arr) + "," + time + "," + path + "\n")
                path = ""
                time = ""
                input_val = ""
    cnt += 1
    f.close()
 def __init__(self, parent = None):
     """
     Constructor
     """
     hw_info_in_xml()
     QMainWindow.__init__(self, parent)
     self.setupUi(self)
     hw_info=xml_parser("./resources/hw.xml")
     stack=[hw_info.first_node_key]
     hw_info=hw_info.hw_info
     stack1 = [self.treeWidget]
     while len(stack)!=0 :
         t=stack.pop()
         try:
           hw_info[t]["description"]
         except KeyError :
           try:
               t=stack.pop()
           except  IndexError:
               pass
         item = QtGui.QTreeWidgetItem(stack1.pop())
         item.setData(1, 1, list(t))
         #print list(t)
         #for i in item.data(1, 1).toList() :
             #print i.toString()
         try:
             item.setText(0, hw_info[t]["description"])
         except KeyError :
             pass
         tmp=hw_info[t]['child_nodes']
         tmp.reverse()
         for i in tmp:
             stack.append(i) 
             stack1.append(item) 
    #self.sys_info_view.setUrl(QUrl("file:///home/boss/py/resources/system-info.html"))
     self.treeWidget.show()
예제 #13
0
def DecisionTree(inp):
    arr = xml_parser.xml_parser('Decision_Tree_Classifier_Params.xml', inp)
    n = len(arr)
    if n != 18:
        return False
    try:
        X, y = make_classification(n_samples=arr[0],
                                   n_features=arr[1],
                                   n_informative=arr[2],
                                   n_classes=arr[3])
    except ValueError:
        # print("here")
        return False
    print(arr)

    if (arr[6] == 'None'):
        arr[6] = None
    else:
        arr[6] = random.randint(1, 100)

    if arr[7] == int(arr[7]):
        arr[7] = int(arr[7])
    else:
        arr[7] = arr[7] / 100.0

    if arr[8] == int(arr[8]):
        arr[8] = int(arr[8])
    else:
        arr[8] = arr[8] / 50.0

    if arr[10] == 'val':
        if arr[11] == int(min(arr[11], arr[1])):
            arr[11] = int(arr[11])
        else:
            arr[11] = arr[11] / 10.0
    elif arr[10] == 'None':
        arr[11] = None
    else:
        arr[11] = arr[10]

    if arr[12] == 'None':
        arr[12] = None
    else:
        arr[12] = random.randint(1, 10)

    if arr[13] == 'None':
        arr[13] = None
    else:
        arr[13] = random.randint(1, 100)

    if arr[16] == 'None':
        arr[16] = None
    elif arr[16] == 'weighted':
        weight_lst = {}
        for class_num in range(arr[3]):
            weight_lst[class_num] = random.randint(1, 5)
        arr[16] = weight_lst

    # X = StandardScaler().fit_transform(X)
    try:
        clf = DecisionTreeClassifier(criterion=arr[4],
                                     splitter=arr[5],
                                     max_depth=arr[6],
                                     min_samples_split=arr[7],
                                     min_samples_leaf=arr[8],
                                     min_weight_fraction_leaf=arr[9],
                                     max_features=arr[11],
                                     random_state=arr[12],
                                     max_leaf_nodes=arr[13],
                                     min_impurity_decrease=arr[14],
                                     class_weight=arr[16],
                                     presort=arr[17])
        clf.fit(X, y)
        print("here1")
    except ValueError:
        print("here2")
        return False
    # except KeyError:
    #     # print("here3")
    #     return False
    return True
예제 #14
0
This is the main file. lines fron 29 to 50 need to be run once, those lines are for computing pCTR for each campaignand computing data statistics. 
For the first time uncomment those lines and comment them when the first run is finished. 
"""
from Data_statistics import data_statistics_test
from Data_statistics import data_statistics_train
from CTR_estimation import ctr_estimate_basedcampaign
from CTR_estimation import ctr_Global_estimate
from Auctions import auction_based_adviser
from Auctions import auction_adviser_less
from Generate_statistics import generate_statistics_based_adviser
from Generate_statistics import generate_statistics_adviser_Less
import pandas as pd
from xml_parser import xml_parser

DSP_list, mode = xml_parser(
    '../Conf_file/Conf_file_mode2.xml')  # read the configuration file

#############################################################
############################################################
###########################################################

#campaings=['2997','2821','2261','2259','3358','3386','3427','3476','1458']
#statistics_train=pd.DataFrame()
#statistics_test=pd.DataFrame()
#data_test_all=pd.read_csv("../all/test.log.txt", header=0, sep='\t', index_col=False,engine='python')# load all test set merged in one file
#global_pCTR=pd.DataFrame()
#global_pCTR['payprice']=data_test_all['payprice']
#for campaign in campaings:
#    data_train=pd.read_csv("../"+campaign+"/train.log.txt", header=0, sep='\t', index_col=False,engine='python')# load train data
#    data_test=pd.read_csv("../"+campaign+"/test.log.txt", header=0, sep='\t', index_col=False,engine='python')# load test data
#    pCTR=ctr_estimate_basedcampaign(campaign,data_train,data_test) # estimate the CTR for the campaign
 def on_treeWidget_itemClicked(self, item, column):
     """
     Slot documentation goes here.
     """
     t=[]
     [t.append(str(i.toString()))for i in item.data(1, 1).toList() ]
     hw_info=xml_parser("./resources/hw.xml").hw_info
     node=hw_info[tuple(t)]
     #print node
     #print dir(self.tableWidget)
     #self.tableWidget.clear()
     row_count=self.tableWidget.rowCount()
     while row_count !=-1:
         self.tableWidget.removeRow(row_count)
         row_count=row_count-1
     row=0
     for i, j in node.iteritems():
         #print i
         #print j
         if i=="configuration" :
             self.tableWidget.insertRow (row)
             item = QtGui.QTableWidgetItem(i+' :')
             self.tableWidget.setItem(row, 0, item)
             row=row+1 
             for id, value in j.iteritems():
                 self.tableWidget.insertRow (row)
                 item = QtGui.QTableWidgetItem(id)
                 item1 = QtGui.QTableWidgetItem(value)
                 self.tableWidget.setItem(row, 0, item)
                 self.tableWidget.setItem(row, 1, item1)
                 row=row+1
                 
             
         elif i=="capabilities" :
             self.tableWidget.insertRow (row)
             #print dir(self.tableWidget)
             item = QtGui.QTableWidgetItem(i+' :')
             self.tableWidget.setItem(row, 0, item)
             row=row+1 
             for k in j: 
                 item1 = QtGui.QTableWidgetItem(k)
                 self.tableWidget.insertRow (row)
                 self.tableWidget.setItem(row, 1, item1)
                 row+=1
         elif i=="resources" :
             self.tableWidget.insertRow (row)
             item = QtGui.QTableWidgetItem(i+' :')
             self.tableWidget.setItem(row, 0, item)
             row=row+1 
             for id, value in j.iteritems():
                 self.tableWidget.insertRow (row)
                 item = QtGui.QTableWidgetItem(id)
                 item1 = QtGui.QTableWidgetItem(value)
                 self.tableWidget.setItem(row, 0, item)
                 self.tableWidget.setItem(row, 1, item1)
                 row=row+1
         elif i=="child_nodes" :
             pass
         else :
             self.tableWidget.insertRow (row)
             item = QtGui.QTableWidgetItem(i)
             item1 = QtGui.QTableWidgetItem(j)
             self.tableWidget.setItem(row, 0, item)
             self.tableWidget.setItem(row, 1, item1)
             row=row+1
예제 #16
0
def run_driver(seed_input):

    inp_program_instr = FunctionCoverageRunner(input_program)
    mutation_fuzzer = MutationCoverageFuzzer(seed=seed_inputs,
                                             min_mutations=1,
                                             max_mutations=5)
    for i in range(NUM_ITER):
        mutation_fuzzer.runs(inp_program_instr, trials=TRIASL_EACH_ITER)
        for key in mutation_fuzzer.coverages_seen.keys():
            print("The path key is: " + str(key))
            max_int = mutation_fuzzer.coverages_seen[key].index(
                max(mutation_fuzzer.coverages_seen[key]))
            print("step: " + str(i + 1))
            print(max(mutation_fuzzer.coverages_seen[key]))
            print(mutation_fuzzer.population[key][max_int])

    # print("Best input and coverage overall!")
    # max_int = mutation_fuzzer.coverages_seen.index(max(mutation_fuzzer.coverages_seen))
    # print(max(mutation_fuzzer.coverages_seen))
    # print(mutation_fuzzer.population[max_int])
    # store the results!
    # based on the length of inputs!
    if INPUT_SIZE:
        f1 = open(
            "complexity_driver_" + str(input_program).split(" ")[1] + ".csv",
            "w")
        for key in mutation_fuzzer.coverages_seen.keys():
            included_lines = []
            index = 0
            for inp_pop in mutation_fuzzer.population[key]:
                if len(inp_pop) not in included_lines:
                    avail_ind = [
                        i
                        for i in range(0, len(mutation_fuzzer.population[key]))
                        if len(mutation_fuzzer.population[key][i]) == len(
                            inp_pop)
                    ]
                    c1 = np.max([
                        mutation_fuzzer.coverages_seen[key][i]
                        for i in avail_ind
                    ])
                    f1.write(
                        str(len(inp_pop)) + "," + str(c1) + "," + str(key) +
                        "\n")
                    included_lines.append(len(inp_pop))
                index += 1
            if key in mutation_fuzzer.model_fit.keys():
                print("The path key is: " + str(key))
                print("The model is: " + str(mutation_fuzzer.model_fit[key]))
                print("The cluster is: " +
                      str(mutation_fuzzer.path_cluster[key]))
            else:
                print("The path key is: " + str(key))
                print("no model for the above key")
        for clust in mutation_fuzzer.cluster_paths.keys():
            print(mutation_fuzzer.cluster_paths[clust])
        f1.close()
    # based on some parts of inputs
    else:
        f1 = open(
            "complexity_driver_" + str(input_program).split(" ")[1] + ".csv",
            "w")
        for key in mutation_fuzzer.coverages_seen.keys():
            included_lines = {}
            index = 0
            for inp_pop in mutation_fuzzer.population[key]:
                arr = xml_parser.xml_parser(input_program_tree, inp_pop)
                # choose the size parameter
                if (len(arr) == NUM_PARAMETERS):
                    len_inp_pop = 1
                    for index in SIZE_INDEX:
                        len_inp_pop *= arr[index]
                else:
                    index += 1
                    continue

                if len_inp_pop not in included_lines:
                    c1 = mutation_fuzzer.coverages_seen[key][index]
                    f1.write(
                        str(arr) + "," + str(len_inp_pop) + "," + str(c1) +
                        "," + str(key) + "\n")
                    included_lines[len_inp_pop] = c1
                else:
                    c1 = mutation_fuzzer.coverages_seen[key][index]
                    max_val = included_lines[len_inp_pop]
                    if max_val < c1:
                        f1.write(
                            str(arr) + "," + str(len_inp_pop) + "," + str(c1) +
                            "," + str(key) + "\n")
                        included_lines[len_inp_pop] = c1
                index += 1
        f1.close()
예제 #17
0
    def run(self, runner):
        """Run function(inp) while tracking coverage.
           If we reach new coverage,
           add inp to population and its coverage to population_coverage
        """
        try:
            result, outcome = super(MutationCoverageFuzzer, self).run(runner)
        except TimeoutError as error:
            print("Caght an error!")
            return ""
        key_path = runner.coverage()[0]
        if Actual_Time:
            val_cost = self.time_cost
        else:
            val_cost = runner.coverage()[1]
        if key_path in self.removed_path:
            return ""
        self.new_coverage = val_cost
        self.num_inp += 1
        # Do Fitting and Clustering
        if INPUT_SIZE and DO_CLUSTERING and self.num_inp % STEPS_TO_DO_CLUSTERING == 0:
            for key_path in self.coverages_seen.keys():
                if key_path in self.updated_since_clustered.keys(
                ) and self.updated_since_clustered[key_path] == False:
                    continue
                X = [len(x) for x in self.population[key_path]]
                y = [x for x in self.coverages_seen[key_path]]
                if len(set(X)) >= DEGREE_TO_FIT + 1:
                    vals, stats_res = P.polyfit(X, y, DEGREE_TO_FIT, full=True)
                    self.model_fit[key_path] = (vals, stats_res[0])

            path_orders = []

            for key_path in self.model_fit.keys():
                if self.updated_since_clustered[key_path] == False:
                    path_orders.append(key_path)
                else:
                    self.updated_since_clustered[key_path] = False
                    path_orders.append(key_path)
                    self.eval_functions[key_path] = [
                        self.model_fit[key_path][0][1] * i_size +
                        self.model_fit[key_path][0][0]
                        for i_size in range(1, MAX_SIZE)
                    ]
            eval_functions_array = np.array([
                self.eval_functions[key] for key in self.eval_functions.keys()
            ])
            if len(self.eval_functions.keys()) >= NUM_CLUSTERS:
                kmeans = KMeans(n_clusters=NUM_CLUSTERS,
                                random_state=1).fit(eval_functions_array)
                self.cluster_paths = {}
                for i, clust in enumerate(kmeans.labels_):
                    self.path_cluster[path_orders[i]] = clust
                    if clust in self.cluster_paths.keys():
                        self.cluster_paths[clust].append(path_orders[i])
                    else:
                        self.cluster_paths[clust] = [path_orders[i]]

        if DO_CLUSTERING and self.num_inp % STEPS_TO_KILL == 0:
            max_val = -1
            max_path = -1
            max_clust = -1
            for cluster in self.cluster_paths.keys():
                for x in self.cluster_paths[cluster]:
                    if self.worst_costs[x] > max_val:
                        max_val = self.worst_costs[x]
                        max_path = x
                        max_clust = cluster
            for key_path in self.coverages_seen.keys():
                if key_path not in self.model_fit.keys():
                    if self.worst_costs[key_path] < max_val:
                        self.population.pop(key_path, None)
                        self.coverages_seen.pop(key_path, None)
                        self.worst_costs.pop(key_path, None)
                        self.last_update.pop(key_path, None)
                        self.removed_path.append(key_path)

        # size limitation based on the number of arguments
        inp_num_args = self.inp.split(" ")
        is_interesting = True
        if not key_path in self.coverages_seen.keys():
            if len(self.inp) <= MAX_SIZE:
                self.coverages_seen[key_path] = [val_cost]
                self.population[key_path] = [self.inp]
                self.worst_costs[key_path] = val_cost
                self.last_update[key_path] = self.num_inp
                self.updated_since_clustered[key_path] = True
            else:
                is_interesting = False
        # this is based on the length of input string
        elif outcome == Runner.PASS and INPUT_SIZE and self.new_coverage > np.percentile(
                self.coverages_seen[key_path], PERCENTAGE_TO_KEEP) and len(
                    self.inp) <= MAX_SIZE and len(self.inp) <= np.median(
                        map(len, self.population[key_path])
                    ) + CONSTANT_FACTOR * np.sqrt(
                        np.median(map(len, self.population[key_path]))):
            self.population[key_path].append(self.inp)
            self.coverages_seen[key_path].append(self.new_coverage)
        # this is based on the arguments in the input
        elif outcome == Runner.PASS and not INPUT_SIZE and self.new_coverage > np.percentile(
                self.coverages_seen[key_path],
                PERCENTAGE_TO_KEEP) and len(inp_num_args) <= NUM_PARAMETERS:
            arr = xml_parser.xml_parser(input_program_tree, self.inp)
            # choose the size parame
            if (len(arr) == NUM_PARAMETERS):
                len_inp_pop = 1
                for index in SIZE_INDEX:
                    len_inp_pop *= arr[index]
            else:
                len_inp_pop = 0
                is_interesting = False
            if is_interesting and key_path not in self.len_inputs.keys():
                self.population[key_path].append(self.inp)
                self.coverages_seen[key_path].append(self.new_coverage)
                self.len_inputs[key_path] = len_inp_pop
            elif is_interesting and len_inp_pop <= self.len_inputs[
                    key_path] + CONSTANT_FACTOR * np.sqrt(
                        self.len_inputs[key_path]):
                self.population[key_path].append(self.inp)
                self.coverages_seen[key_path].append(self.new_coverage)
                if len_inp_pop > self.len_inputs[key_path]:
                    self.len_inputs[key_path] = len_inp_pop
            else:
                is_interesting = False
        # not interesting
        else:
            is_interesting = False

        if is_interesting and self.worst_costs[key_path] < val_cost:
            self.worst_costs[key_path] = val_cost
        if is_interesting:
            self.last_update[key_path] = self.num_inp
            self.updated_since_clustered[key_path] = True
        elif len(self.inp) > MAX_SIZE:
            return ""
        elif not key_path in self.path_cluster.keys(
        ) and self.num_inp - self.last_update[key_path] > STEPS_TO_KILL and len(
                self.population
        ) > MIN_NUM_PATH and self.worst_costs[key_path] <= np.percentile(
                self.worst_costs.values(), 50):
            min = self.worst_costs[key_path]
            min_path = key_path
            for path_other in self.worst_costs.keys():
                if self.worst_costs[path_other] < min:
                    min = self.worst_costs[path_other]
                    min_path = path_other
            if key_path == min_path:
                self.population.pop(key_path, None)
                self.coverages_seen.pop(key_path, None)
                self.worst_costs.pop(key_path, None)
                self.last_update.pop(key_path, None)
                self.removed_path.append(key_path)
                return ""
            else:
                self.population.pop(min_path, None)
                self.coverages_seen.pop(min_path, None)
                self.worst_costs.pop(min_path, None)
                self.last_update.pop(min_path, None)
                self.removed_path.append(min_path)

        elif key_path in self.path_cluster.keys(
        ) and ALLOWED_REMOVE_CLUSTER_PATH and self.num_inp - self.last_update[
                key_path] > STEPS_TO_KILL and len(self.cluster_paths[
                    self.path_cluster[key_path]]) > MIN_NUM_PATH_PER_CLUST:
            clust = self.path_cluster[key_path]
            for x in self.cluster_paths[clust]:
                if self.worst_costs[x] > self.worst_costs[key_path]:
                    self.cluster_paths[clust].remove(key_path)
                    self.path_cluster.pop(key_path, None)
                    self.model_fit.pop(key_path, None)
                    self.updated_since_clustered.pop(key_path, None)
                    self.eval_functions.pop(key_path, None)
                    self.population.pop(key_path, None)
                    self.coverages_seen.pop(key_path, None)
                    self.worst_costs.pop(key_path, None)
                    self.last_update.pop(key_path, None)
                    self.removed_path.append(key_path)
                    break

        if is_interesting == False:
            result = ""
        if len(self.population[key_path]) > MAX_POP_SIZE:
            if INPUT_SIZE:
                indicies = sorted(
                    range(len(self.coverages_seen[key_path])),
                    key=lambda i: self.coverages_seen[key_path][i] /
                    (len(self.population[key_path][i])))[-MAX_POP_SIZE / 8:]
                self.coverages_seen_new = []
                self.population_new = []
                for index in indicies:
                    self.population_new.append(
                        self.population[key_path][index])
                    self.coverages_seen_new.append(
                        self.coverages_seen[key_path][index])
                self.population[key_path] = self.population_new
                self.coverages_seen[key_path] = self.coverages_seen_new
            else:
                indicies = []
                for k in range(MAX_POP_SIZE / 20):
                    indicies.append(random.randint(0, 7 * MAX_POP_SIZE / 8))
                self.coverages_seen_new = []
                self.population_new = []
                for index in indicies:
                    self.population_new.append(
                        self.population[key_path][index])
                    self.coverages_seen_new.append(
                        self.coverages_seen[key_path][index])
                self.population_new = self.population_new + self.population[
                    key_path][-MAX_POP_SIZE / 8:]
                self.coverages_seen_new = self.coverages_seen_new + self.coverages_seen[
                    key_path][-MAX_POP_SIZE / 8:]
                self.population[key_path] = self.population_new
                self.coverages_seen[key_path] = self.coverages_seen_new

        return result
예제 #18
0
def disc_analysis(inp):
    arr = xml_parser.xml_parser('Discriminant_Analysis_Params.xml', inp)
    n = len(arr)
    if n != 13:
        return False
    try:
        # if(arr[5]=='eigen' and arr[1] > 3):
        #     arr[1] = 3
        # if(arr[5]=='lsqr' and arr[1] > 3):
        #     arr[1] = 3
        # X, y = make_classification(n_samples=arr[0], n_features=arr[1],
        #             n_informative=arr[2], n_classes=arr[3])
        if (arr[12] == 0):
            X, y = dataset_fixed_cov(arr[0], arr[1])
        elif (arr[12] == 1):
            X, y = dataset_cov(arr[0], arr[1])
            # print("here!!")
        else:
            if (arr[5] == 'svd'):
                X, y = make_classification(n_samples=arr[0],
                                           n_features=arr[1],
                                           n_informative=arr[2],
                                           n_classes=arr[3])
            else:
                return False
        # print("done1")
    except ValueError:
        # print("error1")
        return False
    # print("here")

    # value for parameter_6
    if (arr[6] == "float"):
        arr[6] = random.uniform(0, 1)
    elif (arr[6] == "auto"):
        arr[6] = "auto"
    else:
        arr[6] = None
    # value for parameter_7
    # if(arr[7]!=None):
    #     val_7 = np.random.dirichlet(np.ones(arr[3]),size=1.0)
    # else:
    arr[7] = None
    # value for parameter_8
    if (arr[8] != 'None'):
        arr[8] = np.random.randint(1, arr[3])
    else:
        arr[8] = None

    # Note in sklearn page
    if (arr[5] == 'svd' and arr[6] != None):
        return False

    print(arr)
    if (arr[4]):
        try:
            # Linear Discriminant Analysis
            lda = LinearDiscriminantAnalysis(solver=arr[5],
                                             shrinkage=arr[6],
                                             priors=arr[7],
                                             n_components=arr[8],
                                             store_covariance=arr[9],
                                             tol=arr[10])
            # print("done2")
        except ValueError:
            # print("error2")
            return False
        try:
            y_pred = lda.fit(X, y)
            # print("done3")
        except TypeError:
            # print("error3")
            return False
    else:
        try:
            # Quadratic Discriminant Analysis
            qda = QuadraticDiscriminantAnalysis(priors=arr[7],
                                                reg_param=arr[11],
                                                store_covariance=[9],
                                                tol=arr[10])
            # print("here21")
        except ValueError:
            return False
        try:
            y_pred = qda.fit(X, y)
            # print("here22")
        except TypeError:
            return False