def compute_info_gain(df, feature, split, y): A = df[[feature, y]] # series = [split for x in range(0, len(A[feature]))] # print series mask = A[feature] <= split B = A[mask] C = A[~mask] info_gain = mystats.binary_entropy(A, y) - mystats.binary_entropy(B, y) + mystats.binary_entropy(C, y) # print 'Information Gain: %s' % info_gain return info_gain