示例#1
0
  def testIsTuple(self):  # - - - - - - - - - - - - - - - - - - - - - - - - - -
    """Test 'check_is_tuple' function."""

    assert (auxiliary.check_is_tuple('TestArgument',())        == None)
    assert (auxiliary.check_is_tuple('TestArgument',(1,))      == None)
    assert (auxiliary.check_is_tuple('TestArgument',('a',))    == None)
    assert (auxiliary.check_is_tuple('TestArgument',('a','b')) == None)
    assert (auxiliary.check_is_tuple('TestArgument',(42,'b'))  == None)
示例#2
0
  def testIsTuple(self):  # - - - - - - - - - - - - - - - - - - - - - - - - - -
    """Test 'check_is_tuple' function."""

    assert (auxiliary.check_is_tuple('TestArgument',())        == None)
    assert (auxiliary.check_is_tuple('TestArgument',(1,))      == None)
    assert (auxiliary.check_is_tuple('TestArgument',('a',))    == None)
    assert (auxiliary.check_is_tuple('TestArgument',('a','b')) == None)
    assert (auxiliary.check_is_tuple('TestArgument',(42,'b'))  == None)
示例#3
0
    def testIsTuple(self):  # - - - - - - - - - - - - - - - - - - - - - - - - - -
        """Test 'check_is_tuple' function."""

        assert auxiliary.check_is_tuple("TestArgument", ()) == None
        assert auxiliary.check_is_tuple("TestArgument", (1,)) == None
        assert auxiliary.check_is_tuple("TestArgument", ("a",)) == None
        assert auxiliary.check_is_tuple("TestArgument", ("a", "b")) == None
        assert auxiliary.check_is_tuple("TestArgument", (42, "b")) == None
示例#4
0
    def testIsTuple(
            self):  # - - - - - - - - - - - - - - - - - - - - - - - - - -
        """Test 'check_is_tuple' function."""

        assert auxiliary.check_is_tuple("TestArgument", ())
        assert auxiliary.check_is_tuple("TestArgument", (1, ))
        assert auxiliary.check_is_tuple("TestArgument", ("a", ))
        assert auxiliary.check_is_tuple("TestArgument", ("a", "b"))
        assert auxiliary.check_is_tuple("TestArgument", (42, "b"))
示例#5
0
def GenerateHistogram(w_vec_dict, bin_width, file_name=None, match_sets=None):
  """Print and/or save a histogram of the weight vectors stored in the given
     dictionary, and according to the match sets (if given).

     The histogram is rotated 90 degrees clockwise, i.e. up to down instead of
     left to right.

     This function sums up the number of weight vectors with a matching weight
     in a given bin (according to the given bin width).

     If given, the match sets must be a tuple containing three sets, the first
     being a set with matches, the second with non-matches, and the third with
     possible matches, as generated by classifiers in the classification.py
     Febrl module.

     For each bin, the number of weight vectors in this bin is printed as well,
     and if the match sets are given the number of matches, non-matches and
     possible matches in this bin.

     If a file name is given, the output will be written into this text file.

     This function returns a list of containing the histogram as text strings.
  """

  MAX_HISTO_WIDTH = 80  # maximum width in characters

  auxiliary.check_is_dictionary('w_vec_dict', w_vec_dict)
  auxiliary.check_is_number('bin_width', bin_width)
  auxiliary.check_is_positive('bin_width', bin_width)
  if (file_name != None):
    auxiliary.check_is_string('file_name', file_name)
  if (match_sets != None):
    auxiliary.check_is_tuple('match_sets', match_sets)
    if (len(match_sets) != 3):
      logging.exception('Match sets must be a tuple containing three sets.')
      raise Exception
    auxiliary.check_is_set('match_sets[0]', match_sets[0])
    auxiliary.check_is_set('match_sets[1]', match_sets[1])
    auxiliary.check_is_set('match_sets[2]', match_sets[2])
    if (len(w_vec_dict) != (len(match_sets[0]) + len(match_sets[1]) + \
                            len(match_sets[2]))):
      logging.exception('Lengths of weight vector dictionary differs from' + \
                        'summed lengths of match sets.')
      raise Exception

  # Check if weight vector dictionary is empty, if so return empty list
  #
  if (w_vec_dict == {}):
    logging.warn('Empty weight vector dictionary given for histogram ' + \
                 'generation')
    return []

  # Get a random vector dictionary element to get dimensionality of vectors
  #
  (rec_id_tuple, w_vec) = w_vec_dict.popitem()
  v_dim = len(w_vec)
  w_vec_dict[rec_id_tuple] = w_vec  # Put back in

  histo_dict = {}  # A combined histogram dictionary

  if (match_sets != None):  #  Also matches, non-matches and possible matches
    match_histo_dict =      {}
    non_match_histo_dict =  {}
    poss_match_histo_dict = {}

  max_bin_w_count = -1 # Maximal count for one binned weight entry

  # Loop over weight vectors - - - - - - - - - - - - - - - - - - - - - - - - -
  #
  for (rec_id_tuple, w_vec) in w_vec_dict.iteritems():

    w_sum = sum(w_vec)  # Sum all weight vector elements
    binned_w = w_sum - (w_sum % bin_width)

    binned_w_count = histo_dict.get(binned_w,0) + 1  # Increase count by one
    histo_dict[binned_w] = binned_w_count

    if (binned_w_count > max_bin_w_count): # Check if this is new maximum count
      max_bin_w_count = binned_w_count

    if (match_sets != None):
      if (rec_id_tuple in match_sets[0]):
        binned_w_count = match_histo_dict.get(binned_w,0) + 1
        match_histo_dict[binned_w] = binned_w_count
      elif (rec_id_tuple in match_sets[1]):
        binned_w_count = non_match_histo_dict.get(binned_w,0) + 1
        non_match_histo_dict[binned_w] = binned_w_count
      else: # A possible match
        binned_w_count = poss_match_histo_dict.get(binned_w,0) + 1
        poss_match_histo_dict[binned_w] = binned_w_count

  # Sort histogram according to X axis values - - - - - - - - - - - - - - - - -
  #
  x_vals = histo_dict.keys()
  x_vals.sort()

  assert sum(histo_dict.values()) == len(w_vec_dict)

  if (match_sets == None):  # Can use 68 characters for histogram
    scale_factor_y = float(MAX_HISTO_WIDTH-19) / max_bin_w_count
  elif (len(poss_match_histo_dict) == 0):  # No possible matches
    scale_factor_y = float(MAX_HISTO_WIDTH-30) / max_bin_w_count
  else:  # All three set non-empty
    scale_factor_y = float(MAX_HISTO_WIDTH-41) / max_bin_w_count

  # Generate the histogram as a list of strings - - - - - - - - - - - - - - - -
  #
  histo_list = []
  histo_list.append('Weight histogram:')
  histo_list.append('-----------------')

  if (match_sets == None):
    histo_list.append('  Counts  | w_sum |')
    histo_list.append('-------------------')
  elif (len(poss_match_histo_dict) == 0):  # No possible matches
    histo_list.append('       Counts        |')
    histo_list.append('  Match   | Non-Match| w_sum |')
    histo_list.append('------------------------------')
  else:
    histo_list.append('              Counts            |')
    histo_list.append('  Match   | Non-Match|Poss-Match| w_sum |')
    histo_list.append('-----------------------------------------')
  for x_val in x_vals:
    this_count = histo_dict[x_val]

    if (match_sets == None):
      line_str = '%9d | %5.2f |' % (this_count, x_val)
    elif (len(poss_match_histo_dict) == 0):  # No possible matches
      this_match_count =     match_histo_dict.get(x_val, 0)
      this_non_match_count = non_match_histo_dict.get(x_val, 0)

      line_str = '%9d |%9d | %5.2f |' % (this_match_count,
                                          this_non_match_count, x_val)
    else:
      this_match_count =      match_histo_dict.get(x_val, 0)
      this_non_match_count =  non_match_histo_dict.get(x_val, 0)
      this_poss_match_count = poss_match_histo_dict.get(x_val, 0)

      line_str = '%9d |%9d |%9d | %5.2f |' % (this_match_count,
                                                this_non_match_count,
                                                this_poss_match_count, x_val)

    line_str += '*'*int(this_count*scale_factor_y)
    histo_list.append(line_str)

  histo_list.append('')

  # If a file name is given open it for writing - - - - - - - - - - - - - - - -
  #
  if (file_name != None):
    try:
      f = open(file_name, 'w')
    except:
      logging.exception('Cannot open file "%s" for writing' % (str(file_name)))
      raise IOError

    for line in histo_list:
      f.write(line + os.linesep)

    f.close()
    logging.info('Histogram written to file: %s' % (file_name))

  if (match_sets != None):
    print match_histo_dict.items()
    print non_match_histo_dict.items()

  return histo_list
def GenerateHistogram(w_vec_dict, bin_width, file_name=None, match_sets=None):
    """Print and/or save a histogram of the weight vectors stored in the given
     dictionary, and according to the match sets (if given).

     The histogram is rotated 90 degrees clockwise, i.e. up to down instead of
     left to right.

     This function sums up the number of weight vectors with a matching weight
     in a given bin (according to the given bin width).

     If given, the match sets must be a tuple containing three sets, the first
     being a set with matches, the second with non-matches, and the third with
     possible matches, as generated by classifiers in the classification.py
     Febrl module.

     For each bin, the number of weight vectors in this bin is printed as well,
     and if the match sets are given the number of matches, non-matches and
     possible matches in this bin.

     If a file name is given, the output will be written into this text file.

     This function returns a list of containing the histogram as text strings.
  """

    MAX_HISTO_WIDTH = 80  # maximum width in characters

    auxiliary.check_is_dictionary('w_vec_dict', w_vec_dict)
    auxiliary.check_is_number('bin_width', bin_width)
    auxiliary.check_is_positive('bin_width', bin_width)
    if (file_name != None):
        auxiliary.check_is_string('file_name', file_name)
    if (match_sets != None):
        auxiliary.check_is_tuple('match_sets', match_sets)
        if (len(match_sets) != 3):
            logging.exception(
                'Match sets must be a tuple containing three sets.')
            raise Exception
        auxiliary.check_is_set('match_sets[0]', match_sets[0])
        auxiliary.check_is_set('match_sets[1]', match_sets[1])
        auxiliary.check_is_set('match_sets[2]', match_sets[2])
        if (len(w_vec_dict) != (len(match_sets[0]) + len(match_sets[1]) + \
                                len(match_sets[2]))):
            logging.exception('Lengths of weight vector dictionary differs from' + \
                              'summed lengths of match sets.')
            raise Exception

    # Check if weight vector dictionary is empty, if so return empty list
    #
    if (w_vec_dict == {}):
        logging.warn('Empty weight vector dictionary given for histogram ' + \
                     'generation')
        return []

    # Get a random vector dictionary element to get dimensionality of vectors
    #
    (rec_id_tuple, w_vec) = w_vec_dict.popitem()
    v_dim = len(w_vec)
    w_vec_dict[rec_id_tuple] = w_vec  # Put back in

    histo_dict = {}  # A combined histogram dictionary

    if (match_sets != None):  #  Also matches, non-matches and possible matches
        match_histo_dict = {}
        non_match_histo_dict = {}
        poss_match_histo_dict = {}

    max_bin_w_count = -1  # Maximal count for one binned weight entry

    # Loop over weight vectors - - - - - - - - - - - - - - - - - - - - - - - - -
    #
    for (rec_id_tuple, w_vec) in w_vec_dict.iteritems():

        w_sum = sum(w_vec)  # Sum all weight vector elements
        binned_w = w_sum - (w_sum % bin_width)

        binned_w_count = histo_dict.get(binned_w,
                                        0) + 1  # Increase count by one
        histo_dict[binned_w] = binned_w_count

        if (binned_w_count >
                max_bin_w_count):  # Check if this is new maximum count
            max_bin_w_count = binned_w_count

        if (match_sets != None):
            if (rec_id_tuple in match_sets[0]):
                binned_w_count = match_histo_dict.get(binned_w, 0) + 1
                match_histo_dict[binned_w] = binned_w_count
            elif (rec_id_tuple in match_sets[1]):
                binned_w_count = non_match_histo_dict.get(binned_w, 0) + 1
                non_match_histo_dict[binned_w] = binned_w_count
            else:  # A possible match
                binned_w_count = poss_match_histo_dict.get(binned_w, 0) + 1
                poss_match_histo_dict[binned_w] = binned_w_count

    # Sort histogram according to X axis values - - - - - - - - - - - - - - - - -
    #
    x_vals = histo_dict.keys()
    x_vals.sort()

    assert sum(histo_dict.values()) == len(w_vec_dict)

    if (match_sets == None):  # Can use 68 characters for histogram
        scale_factor_y = float(MAX_HISTO_WIDTH - 19) / max_bin_w_count
    elif (len(poss_match_histo_dict) == 0):  # No possible matches
        scale_factor_y = float(MAX_HISTO_WIDTH - 30) / max_bin_w_count
    else:  # All three set non-empty
        scale_factor_y = float(MAX_HISTO_WIDTH - 41) / max_bin_w_count

    # Generate the histogram as a list of strings - - - - - - - - - - - - - - - -
    #
    histo_list = []
    histo_list.append('Weight histogram:')
    histo_list.append('-----------------')

    if (match_sets == None):
        histo_list.append('  Counts  | w_sum |')
        histo_list.append('-------------------')
    elif (len(poss_match_histo_dict) == 0):  # No possible matches
        histo_list.append('       Counts        |')
        histo_list.append('  Match   | Non-Match| w_sum |')
        histo_list.append('------------------------------')
    else:
        histo_list.append('              Counts            |')
        histo_list.append('  Match   | Non-Match|Poss-Match| w_sum |')
        histo_list.append('-----------------------------------------')
    for x_val in x_vals:
        this_count = histo_dict[x_val]

        if (match_sets == None):
            line_str = '%9d | %5.2f |' % (this_count, x_val)
        elif (len(poss_match_histo_dict) == 0):  # No possible matches
            this_match_count = match_histo_dict.get(x_val, 0)
            this_non_match_count = non_match_histo_dict.get(x_val, 0)

            line_str = '%9d |%9d | %5.2f |' % (this_match_count,
                                               this_non_match_count, x_val)
        else:
            this_match_count = match_histo_dict.get(x_val, 0)
            this_non_match_count = non_match_histo_dict.get(x_val, 0)
            this_poss_match_count = poss_match_histo_dict.get(x_val, 0)

            line_str = '%9d |%9d |%9d | %5.2f |' % (
                this_match_count, this_non_match_count, this_poss_match_count,
                x_val)

        line_str += '*' * int(this_count * scale_factor_y)
        histo_list.append(line_str)

    histo_list.append('')

    # If a file name is given open it for writing - - - - - - - - - - - - - - - -
    #
    if (file_name != None):
        try:
            f = open(file_name, 'w')
        except:
            logging.exception('Cannot open file "%s" for writing' %
                              (str(file_name)))
            raise IOError

        for line in histo_list:
            f.write(line + os.linesep)

        f.close()
        logging.info('Histogram written to file: %s' % (file_name))

    if (match_sets != None):
        print match_histo_dict.items()
        print non_match_histo_dict.items()

    return histo_list