Пример #1
0
def entropy(objects, question):
    '''Returns an entropy value. This algorithm for entropy is heavily modeled
       on the ID3 decision tree algorithm for entropy. The difference is that here,
       we want what would traditionally be a high entropy. To adjust for this,
       we take the reciprocal of entropy before returning it.'''
       
    objects = tuple(objects) # necessary for SQL IN statement to work
    positives = model.get_num_positives(objects, question.id) *1.0
    negatives = model.get_num_negatives(objects, question.id) *1.0
    total = len(objects)
    
    if positives != 0:
        frac_positives = (-1*positives)/total * math.log(positives/total, 2)
    else:
        frac_positives = 0
    if negatives != 0:
        frac_negatives = (-1*negatives)/total * math.log(negatives/total, 2)
    else:
        frac_negatives = 0
    
    entropy = frac_positives + frac_negatives
    
    entropy *= (positives + negatives)/total # weighted average
    
    if entropy != 0: entropy = 1/entropy # minimizes rather than maximizes
    else: entropy = float('inf')
    
    return entropy
Пример #2
0
def simple_entropy(objects,question):
    '''Returns an entropy value for a question based on the weights for all the
       objects. Entropy is low if for a given question, the number of yes and no
       answers is about even, and the number of unsure answers is low.'''
    
    objects = tuple(objects) # necessary for SQL IN statement to work
    positives = model.get_num_positives(objects, question.id)
    negatives = model.get_num_negatives(objects, question.id)
    unknowns = model.get_num_unknowns(objects, question.id)
    
    question_entropy = 0
    
    question_entropy += positives * 1
    question_entropy -= negatives * 1
    question_entropy += unknowns * 5 # arbitrary weight to discourage questions with lots of unknowns
    
    return abs(question_entropy)