def local_first(_list=[], n=10, m=3): import utility.Util as u # Calculating initial SPT import algorithms.SPT as SPT matrix = SPT.SPT(_list, n, m) # Reversing each row of matrix for i in range(m): matrix[i].reverse() # Length of the shortest matrix row(last is always shortest for SPT) _length = len(matrix[-1]) # Operating with ordering levels(ukr: rivni vporiadkovanosti) # Swap each min-max pair on current ordering level, while there still unswapped elements for i in range(_length): start_iterator = 0 end_iterator = m - 1 while start_iterator < end_iterator: if u.radial_swap_calculate(matrix, start_iterator, i, end_iterator, i)[1] < 0: u.matrix_swap(matrix, start_iterator, i, end_iterator, i) start_iterator += 1 end_iterator -= 1 # Reverse matrix rows to their previous order for i in range(m): matrix[i].reverse() return matrix
def load_data(self, fname): obj = Util() lnum = obj.file_len(fname) with open(fname, 'r') as f: #output = open(fname, 'r') for i in range(0, lnum+1): data = cPickle.load(f) print data f.close()
def local_second(_list=[], n=10, m=3): import utility.Util as u import algorithms.SPT as SPT # initial SPT matrix = SPT.SPT(_list, n, m) _length = len(matrix[-1]) # n times try to swap values and recalculate TF. Accept the swaps with best possible TF value for _ in range(n): for i in range(_length): u.try_all_in_radius(matrix, i, m) return matrix
def get_similarity_relevance_dict(self, trace_links): """ Returns a dict with the shape: req_dict["req_name"] = [(sim_to_code_1: float, relevant: bool), (sim_to_code_2, relevant), ...] This is used for the average precision calculation """ req_dict = {} sol_matrix_copy = Util.deep_copy( self._solution_trace_matrix ) # Use copy to track false negatives and avoid duplicate trace links for trace_link in trace_links: req_name = trace_link.req_key code_name = trace_link.code_key sim_rel_tuple_to_add = (trace_link.similarity, False) if sol_matrix_copy.contains_req_code_pair(req_name, code_name): sim_rel_tuple_to_add = (trace_link.similarity, True) sol_matrix_copy.remove_trace_pair(req_name, code_name) if req_name in req_dict: req_dict[req_name].append(sim_rel_tuple_to_add) else: req_dict[req_name] = [sim_rel_tuple_to_add] if self._print_false_negatives: self._print_false_negatives(sol_matrix_copy) return req_dict
def getPackagename(filename: str) -> str: retval = "" with open(filename) as f: in_comment = False for line in f.readlines(): if retval: break tokenized = Util.splitToken(line) if tokenized: for i, token in enumerate(tokenized): if token == "//" and not in_comment: break if token == "/*" and not in_comment: in_comment = True continue elif token == "*/" and in_comment: in_comment = False continue if token == "package" and not in_comment: try: buf = "" token_ = tokenized[i + 1] j = i + 1 while token_ != ";": buf += token_ j += 1 token_ = tokenized[j] retval = buf break except IndexError as e: print("Error line: " + line.rstrip(os.linesep)) return retval
def get_true_positives(self, trace_link_candidates): if not trace_link_candidates: log.debug("No Trace Link candidates!") return [] valid_trace_links = [] sol_matrix_copy = Util.deep_copy(self._solution_trace_matrix) false_positives_matrix = SolutionMatrix() for trace_link in trace_link_candidates: if sol_matrix_copy.contains_req_code_pair(trace_link.req_key, trace_link.code_key): # Remove correct trace links on copy to avoid duplicate true positive count sol_matrix_copy.remove_trace_pair(trace_link.req_key, trace_link.code_key) valid_trace_links.append(trace_link) elif self._print_false_positives: false_positives_matrix.add_trace_pair(trace_link.req_key, trace_link.code_key) if self._print_false_negatives: self._print_false_negatives(sol_matrix_copy) if self._print_false_positives: log.info( "\n\nFalse Positives: {} Links, {} unique Reqs, {} unique Code" .format(false_positives_matrix._number_of_trace_links, false_positives_matrix.num_unique_reqs(), false_positives_matrix.num_unique_code())) log.info("\n" + false_positives_matrix.print_str()) return valid_trace_links
def __init__(self, return_type: IdentifierString, name: IdentifierString, comment: IdentifierString, body: IdentifierString, left_side_identifiers: IdentifierString, parameters: [Parameter] = [], line=None): self.return_type = return_type self.name = name self.original_name = Util.deep_copy(name) self.parameters = parameters self.original_parameters = Util.deep_copy(parameters) self.comment = comment self.body = body self.left_side_identifiers = left_side_identifiers self.line = line # The line number in the code file where the method signature is written self.token_list = self._create_token_list()
def _create_embeddings(self, file_representation): chosen_word_groups = self._requirements_word_chooser.choose_words_from( file_representation) chosen_word_groups_embeddings = [] requirement_element_vectors = [] for word_group in chosen_word_groups: word_embeddings = self._create_word_embeddings_from_word_list( word_group) chosen_word_groups_embeddings.append(word_embeddings) requirement_element_vectors.append( Util.create_averaged_vector(word_embeddings)) file_vector = Util.create_averaged_vector( [word for word_group in chosen_word_groups for word in word_group] ) # flat average over all (nested) word embeddings in chosen_word_groups return RequirementEmbeddingContainer(file_representation.file_path, file_vector, requirement_element_vectors)
def process(self, majority_drop_thresh) -> [TraceLink]: # Step 1: Calculate code element to (whole) req trace links trace_link_data_structure = self._element_level_trace_link_aggregator.process( Util.deep_copy(self._trace_link_data_structure)) # Step 2: (optional) Update code element to (whole) req trace links according to call graph neighbors if self._callgraph_aggregator: trace_link_data_structure = self._callgraph_aggregator.process( trace_link_data_structure) # Step 3: Do majority decision to obtain (whole) code file to (whole) req similarities return self._majority_decision.process(trace_link_data_structure, majority_drop_thresh)
def _do_majority_decision(self, code_file_name, votes, sims_per_req): voted_trace_links = [] if votes: majority_ranked_dict, max_vote_count = Util.majority_count(votes) for req_file_name in majority_ranked_dict: if majority_ranked_dict[req_file_name] == max_vote_count: code_file_to_req_file_similarity = self._code_reduce_function( sims_per_req[req_file_name]) voted_trace_links.append( TraceLink(req_file_name, code_file_name, code_file_to_req_file_similarity)) else: log.debug(f"No votes for {code_file_name}") return voted_trace_links
def __init__(self, name: IdentifierString, comment: IdentifierString, attributes: [Attribute] = [], methods: [Method] = [], inner_classifiers=[], extended_classifiers=[], implemented_classifiers=[], line=None): self.name = name self.original_name = Util.deep_copy(name) self.extended_classifiers = extended_classifiers self.implemented_classifiers = implemented_classifiers self.attributes = attributes self.methods = methods self.comment = comment self.inner_classifiers = inner_classifiers self.line = line self.token_list = self._create_token_list()
def getClassname(filename: str) -> list: retval = [] with open(filename) as f: in_comment = False for line in f.readlines(): tokenized = Util.splitToken(line) if tokenized: if tokenized[0] == "//": continue for i, token in enumerate(tokenized): if token == "//" and not in_comment: break if token == "/*" and not in_comment: in_comment = True continue elif token == "*/" and in_comment: in_comment = False continue if token == "class" and not in_comment: try: if not tokenized[i + 1] in Util.symbol: retval.append(tokenized[i + 1]) except IndexError as e: print("Error line: " + line.rstrip(os.linesep)) return retval
def _embedd_and_average(self, word_list): word_embd = self._create_word_embeddings_from_word_list(word_list) return [Util.create_averaged_vector(word_embd)] if word_embd else [] # Return as (empty) lists to avoid is-None-check later on
def get_copy_of_similarity_matrix(self): return Util.deep_copy(self._similarity_matrix)
def _addTestNumber(self, ESTest_path): print("_addTestNumber start") try: source_lines = self._fileCopyAndMakeLineList( ESTest_path, path.join(self.getThisProjectPath(), "temp", "temp2.java")) # source_lines = self._removeComment(source_lines) # print(source_lines) with open(ESTest_path, mode='w') as f: func_dive = 0 in_func = False objectname = [] imported = False for line in source_lines: tokenized_line = Util.splitToken(line) if len(tokenized_line) >= 3: if tokenized_line[0] == "public" and tokenized_line[ 1] == "void" and "test" in tokenized_line[2]: test_number = tokenized_line[2].replace("test", "") print(line, file=f) print("System.out.println(\"ESTest_test[" + test_number + "]\");", file=f) func_dive = 1 in_func = True continue if in_func: func_dive += tokenized_line.count('{') func_dive -= tokenized_line.count('}') if len(tokenized_line) >= 2: if getClassname( self.javasource_path )[0] == tokenized_line[0] and Util.isIdentifier( tokenized_line[1]): objectname.append(tokenized_line[1]) if func_dive == 0: in_func = False for o in objectname: print( "System.out.println(\"FilallyObjectAttributes_start[" + o + "]\");", file=f) print( "try{System.out.println(new XStream().toXML(" + o + "));}catch(Exception e){e.printStackTrace();}", file=f) print( "System.out.println(\"FilallyObjectAttributes_end[" + o + "]\");", file=f) objectname = [] print(line, file=f) continue if not imported and "import" in tokenized_line: print("import com.thoughtworks.xstream.XStream;", file=f) imported = True print(line.replace("mockJVMNonDeterminism = true", "mockJVMNonDeterminism = false"), file=f) except: raise
import tensorflow as tf import numpy as np import sys sys.path.append("..") import utility.Util as Util credit_data = Util.open_file("../data/credit_data.csv") # create lists of types of features numerical = ["Duration", 'InstallmentRatePecnt', 'PresentResidenceTime', 'Age'] target = ['CreditStatus'] credit_data = credit_data.sample(frac=1).reset_index(drop=True) train_x, train_y = Util.pre_process_data(credit_data, numerical, target) # dividing the dataset into training and test sets x_train, y_train, x_test, y_test = Util.split_data(0.8, train_x, train_y) n_hidden_1 = 8 n_input = train_x.shape[1] n_classes = train_y.shape[1] weights = { 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])), 'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes])) } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 'out': tf.Variable(tf.random_normal([n_classes])) }
import pandas as pd import tensorflow as tf import numpy as np import sys sys.path.append("..") import utility.Util as Util credit_data = Util.open_file("../data/credit_data.csv") # create lists of types of features numerical = ["Duration", 'InstallmentRatePecnt', 'PresentResidenceTime', 'Age'] # categorical = ["CheckingAcctStat", "CreditHistory", "Purpose", 'Savings', 'Employment', 'Property', 'Telephone'] target = ['CreditStatus'] positive_class, negative_class = Util.decompose_classes( credit_data, 'CreditStatus') # get numerical, categorical and labels for each class positive_numerical, positive_target = Util.pre_process_data( positive_class, numerical, target) negative_numerical, negative_target = Util.pre_process_data( negative_class, numerical, target) # cluster data and get cluster labels positive_cluster = Util.cluster_data( positive_numerical) # .join(positive_categorical) negative_cluster = Util.cluster_data(negative_numerical) negative_cluster = np.array([x + 3 for x in negative_cluster]) # give the new cluster label column a name positive_cluster_labels = pd.DataFrame(positive_cluster,
def set_params(self, param_list): self.parameters = param_list self.original_parameters = Util.deep_copy(param_list)