def testIsTuple(self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'check_is_tuple' function.""" assert (auxiliary.check_is_tuple('TestArgument',()) == None) assert (auxiliary.check_is_tuple('TestArgument',(1,)) == None) assert (auxiliary.check_is_tuple('TestArgument',('a',)) == None) assert (auxiliary.check_is_tuple('TestArgument',('a','b')) == None) assert (auxiliary.check_is_tuple('TestArgument',(42,'b')) == None)
def testIsTuple(self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'check_is_tuple' function.""" assert auxiliary.check_is_tuple("TestArgument", ()) == None assert auxiliary.check_is_tuple("TestArgument", (1,)) == None assert auxiliary.check_is_tuple("TestArgument", ("a",)) == None assert auxiliary.check_is_tuple("TestArgument", ("a", "b")) == None assert auxiliary.check_is_tuple("TestArgument", (42, "b")) == None
def testIsTuple( self): # - - - - - - - - - - - - - - - - - - - - - - - - - - """Test 'check_is_tuple' function.""" assert auxiliary.check_is_tuple("TestArgument", ()) assert auxiliary.check_is_tuple("TestArgument", (1, )) assert auxiliary.check_is_tuple("TestArgument", ("a", )) assert auxiliary.check_is_tuple("TestArgument", ("a", "b")) assert auxiliary.check_is_tuple("TestArgument", (42, "b"))
def GenerateHistogram(w_vec_dict, bin_width, file_name=None, match_sets=None): """Print and/or save a histogram of the weight vectors stored in the given dictionary, and according to the match sets (if given). The histogram is rotated 90 degrees clockwise, i.e. up to down instead of left to right. This function sums up the number of weight vectors with a matching weight in a given bin (according to the given bin width). If given, the match sets must be a tuple containing three sets, the first being a set with matches, the second with non-matches, and the third with possible matches, as generated by classifiers in the classification.py Febrl module. For each bin, the number of weight vectors in this bin is printed as well, and if the match sets are given the number of matches, non-matches and possible matches in this bin. If a file name is given, the output will be written into this text file. This function returns a list of containing the histogram as text strings. """ MAX_HISTO_WIDTH = 80 # maximum width in characters auxiliary.check_is_dictionary('w_vec_dict', w_vec_dict) auxiliary.check_is_number('bin_width', bin_width) auxiliary.check_is_positive('bin_width', bin_width) if (file_name != None): auxiliary.check_is_string('file_name', file_name) if (match_sets != None): auxiliary.check_is_tuple('match_sets', match_sets) if (len(match_sets) != 3): logging.exception('Match sets must be a tuple containing three sets.') raise Exception auxiliary.check_is_set('match_sets[0]', match_sets[0]) auxiliary.check_is_set('match_sets[1]', match_sets[1]) auxiliary.check_is_set('match_sets[2]', match_sets[2]) if (len(w_vec_dict) != (len(match_sets[0]) + len(match_sets[1]) + \ len(match_sets[2]))): logging.exception('Lengths of weight vector dictionary differs from' + \ 'summed lengths of match sets.') raise Exception # Check if weight vector dictionary is empty, if so return empty list # if (w_vec_dict == {}): logging.warn('Empty weight vector dictionary given for histogram ' + \ 'generation') return [] # Get a random vector dictionary element to get dimensionality of vectors # (rec_id_tuple, w_vec) = w_vec_dict.popitem() v_dim = len(w_vec) w_vec_dict[rec_id_tuple] = w_vec # Put back in histo_dict = {} # A combined histogram dictionary if (match_sets != None): # Also matches, non-matches and possible matches match_histo_dict = {} non_match_histo_dict = {} poss_match_histo_dict = {} max_bin_w_count = -1 # Maximal count for one binned weight entry # Loop over weight vectors - - - - - - - - - - - - - - - - - - - - - - - - - # for (rec_id_tuple, w_vec) in w_vec_dict.iteritems(): w_sum = sum(w_vec) # Sum all weight vector elements binned_w = w_sum - (w_sum % bin_width) binned_w_count = histo_dict.get(binned_w,0) + 1 # Increase count by one histo_dict[binned_w] = binned_w_count if (binned_w_count > max_bin_w_count): # Check if this is new maximum count max_bin_w_count = binned_w_count if (match_sets != None): if (rec_id_tuple in match_sets[0]): binned_w_count = match_histo_dict.get(binned_w,0) + 1 match_histo_dict[binned_w] = binned_w_count elif (rec_id_tuple in match_sets[1]): binned_w_count = non_match_histo_dict.get(binned_w,0) + 1 non_match_histo_dict[binned_w] = binned_w_count else: # A possible match binned_w_count = poss_match_histo_dict.get(binned_w,0) + 1 poss_match_histo_dict[binned_w] = binned_w_count # Sort histogram according to X axis values - - - - - - - - - - - - - - - - - # x_vals = histo_dict.keys() x_vals.sort() assert sum(histo_dict.values()) == len(w_vec_dict) if (match_sets == None): # Can use 68 characters for histogram scale_factor_y = float(MAX_HISTO_WIDTH-19) / max_bin_w_count elif (len(poss_match_histo_dict) == 0): # No possible matches scale_factor_y = float(MAX_HISTO_WIDTH-30) / max_bin_w_count else: # All three set non-empty scale_factor_y = float(MAX_HISTO_WIDTH-41) / max_bin_w_count # Generate the histogram as a list of strings - - - - - - - - - - - - - - - - # histo_list = [] histo_list.append('Weight histogram:') histo_list.append('-----------------') if (match_sets == None): histo_list.append(' Counts | w_sum |') histo_list.append('-------------------') elif (len(poss_match_histo_dict) == 0): # No possible matches histo_list.append(' Counts |') histo_list.append(' Match | Non-Match| w_sum |') histo_list.append('------------------------------') else: histo_list.append(' Counts |') histo_list.append(' Match | Non-Match|Poss-Match| w_sum |') histo_list.append('-----------------------------------------') for x_val in x_vals: this_count = histo_dict[x_val] if (match_sets == None): line_str = '%9d | %5.2f |' % (this_count, x_val) elif (len(poss_match_histo_dict) == 0): # No possible matches this_match_count = match_histo_dict.get(x_val, 0) this_non_match_count = non_match_histo_dict.get(x_val, 0) line_str = '%9d |%9d | %5.2f |' % (this_match_count, this_non_match_count, x_val) else: this_match_count = match_histo_dict.get(x_val, 0) this_non_match_count = non_match_histo_dict.get(x_val, 0) this_poss_match_count = poss_match_histo_dict.get(x_val, 0) line_str = '%9d |%9d |%9d | %5.2f |' % (this_match_count, this_non_match_count, this_poss_match_count, x_val) line_str += '*'*int(this_count*scale_factor_y) histo_list.append(line_str) histo_list.append('') # If a file name is given open it for writing - - - - - - - - - - - - - - - - # if (file_name != None): try: f = open(file_name, 'w') except: logging.exception('Cannot open file "%s" for writing' % (str(file_name))) raise IOError for line in histo_list: f.write(line + os.linesep) f.close() logging.info('Histogram written to file: %s' % (file_name)) if (match_sets != None): print match_histo_dict.items() print non_match_histo_dict.items() return histo_list
def GenerateHistogram(w_vec_dict, bin_width, file_name=None, match_sets=None): """Print and/or save a histogram of the weight vectors stored in the given dictionary, and according to the match sets (if given). The histogram is rotated 90 degrees clockwise, i.e. up to down instead of left to right. This function sums up the number of weight vectors with a matching weight in a given bin (according to the given bin width). If given, the match sets must be a tuple containing three sets, the first being a set with matches, the second with non-matches, and the third with possible matches, as generated by classifiers in the classification.py Febrl module. For each bin, the number of weight vectors in this bin is printed as well, and if the match sets are given the number of matches, non-matches and possible matches in this bin. If a file name is given, the output will be written into this text file. This function returns a list of containing the histogram as text strings. """ MAX_HISTO_WIDTH = 80 # maximum width in characters auxiliary.check_is_dictionary('w_vec_dict', w_vec_dict) auxiliary.check_is_number('bin_width', bin_width) auxiliary.check_is_positive('bin_width', bin_width) if (file_name != None): auxiliary.check_is_string('file_name', file_name) if (match_sets != None): auxiliary.check_is_tuple('match_sets', match_sets) if (len(match_sets) != 3): logging.exception( 'Match sets must be a tuple containing three sets.') raise Exception auxiliary.check_is_set('match_sets[0]', match_sets[0]) auxiliary.check_is_set('match_sets[1]', match_sets[1]) auxiliary.check_is_set('match_sets[2]', match_sets[2]) if (len(w_vec_dict) != (len(match_sets[0]) + len(match_sets[1]) + \ len(match_sets[2]))): logging.exception('Lengths of weight vector dictionary differs from' + \ 'summed lengths of match sets.') raise Exception # Check if weight vector dictionary is empty, if so return empty list # if (w_vec_dict == {}): logging.warn('Empty weight vector dictionary given for histogram ' + \ 'generation') return [] # Get a random vector dictionary element to get dimensionality of vectors # (rec_id_tuple, w_vec) = w_vec_dict.popitem() v_dim = len(w_vec) w_vec_dict[rec_id_tuple] = w_vec # Put back in histo_dict = {} # A combined histogram dictionary if (match_sets != None): # Also matches, non-matches and possible matches match_histo_dict = {} non_match_histo_dict = {} poss_match_histo_dict = {} max_bin_w_count = -1 # Maximal count for one binned weight entry # Loop over weight vectors - - - - - - - - - - - - - - - - - - - - - - - - - # for (rec_id_tuple, w_vec) in w_vec_dict.iteritems(): w_sum = sum(w_vec) # Sum all weight vector elements binned_w = w_sum - (w_sum % bin_width) binned_w_count = histo_dict.get(binned_w, 0) + 1 # Increase count by one histo_dict[binned_w] = binned_w_count if (binned_w_count > max_bin_w_count): # Check if this is new maximum count max_bin_w_count = binned_w_count if (match_sets != None): if (rec_id_tuple in match_sets[0]): binned_w_count = match_histo_dict.get(binned_w, 0) + 1 match_histo_dict[binned_w] = binned_w_count elif (rec_id_tuple in match_sets[1]): binned_w_count = non_match_histo_dict.get(binned_w, 0) + 1 non_match_histo_dict[binned_w] = binned_w_count else: # A possible match binned_w_count = poss_match_histo_dict.get(binned_w, 0) + 1 poss_match_histo_dict[binned_w] = binned_w_count # Sort histogram according to X axis values - - - - - - - - - - - - - - - - - # x_vals = histo_dict.keys() x_vals.sort() assert sum(histo_dict.values()) == len(w_vec_dict) if (match_sets == None): # Can use 68 characters for histogram scale_factor_y = float(MAX_HISTO_WIDTH - 19) / max_bin_w_count elif (len(poss_match_histo_dict) == 0): # No possible matches scale_factor_y = float(MAX_HISTO_WIDTH - 30) / max_bin_w_count else: # All three set non-empty scale_factor_y = float(MAX_HISTO_WIDTH - 41) / max_bin_w_count # Generate the histogram as a list of strings - - - - - - - - - - - - - - - - # histo_list = [] histo_list.append('Weight histogram:') histo_list.append('-----------------') if (match_sets == None): histo_list.append(' Counts | w_sum |') histo_list.append('-------------------') elif (len(poss_match_histo_dict) == 0): # No possible matches histo_list.append(' Counts |') histo_list.append(' Match | Non-Match| w_sum |') histo_list.append('------------------------------') else: histo_list.append(' Counts |') histo_list.append(' Match | Non-Match|Poss-Match| w_sum |') histo_list.append('-----------------------------------------') for x_val in x_vals: this_count = histo_dict[x_val] if (match_sets == None): line_str = '%9d | %5.2f |' % (this_count, x_val) elif (len(poss_match_histo_dict) == 0): # No possible matches this_match_count = match_histo_dict.get(x_val, 0) this_non_match_count = non_match_histo_dict.get(x_val, 0) line_str = '%9d |%9d | %5.2f |' % (this_match_count, this_non_match_count, x_val) else: this_match_count = match_histo_dict.get(x_val, 0) this_non_match_count = non_match_histo_dict.get(x_val, 0) this_poss_match_count = poss_match_histo_dict.get(x_val, 0) line_str = '%9d |%9d |%9d | %5.2f |' % ( this_match_count, this_non_match_count, this_poss_match_count, x_val) line_str += '*' * int(this_count * scale_factor_y) histo_list.append(line_str) histo_list.append('') # If a file name is given open it for writing - - - - - - - - - - - - - - - - # if (file_name != None): try: f = open(file_name, 'w') except: logging.exception('Cannot open file "%s" for writing' % (str(file_name))) raise IOError for line in histo_list: f.write(line + os.linesep) f.close() logging.info('Histogram written to file: %s' % (file_name)) if (match_sets != None): print match_histo_dict.items() print non_match_histo_dict.items() return histo_list