Пример #1
0
def EntropyDiscretizer(c, accept_strategy=mdlp_accept, min_size=100):
	"""
	c = c.reindex(f.index)
	
	return: list of breaking points, l
	"""
	cuts = []
	intervals = [[0, c.shape[0]]]
	while intervals != []:
		currInterval = intervals.pop()
		start, end = currInterval[0], currInterval[1]
		if ent(c[start:end]) == 0: continue
		t, e = optimal_cut(c, start, end)
		if (t > start + min_size and t < end - min_size) and accept_strategy(c, e, t):
			cuts.append(t)
			intervals.append([t, end])
			intervals.append([start, t])
	return cuts
Пример #2
0
def symmetricalUncertainty(x, y):
    return 2 * infoGain(x, y) / (ent(x) + ent(y))
Пример #3
0
def symmetricalUncertainty(x, y):
	return 2*infoGain(x, y)/(ent(x)+ent(y))
        'NAT Destination Port', 'Action', 'Bytes', 'Bytes Sent',
        'Bytes Received', 'Packets', 'Elapsed Time (sec)', 'pkts_sent',
        'pkts_received'
    ]

    # removes every row in skip. faster loadtime
    data = pd.read_csv(file, skiprows=skip, header=0, names=col_names)
    return data


port_DATA = sample()

print("length: %s", len(port_DATA))

# Some of the values are reading as infinite. Replace with NaN
port_DATA.replace([np.inf, -np.inf], np.nan, inplace=True)

# Drop the Rows with NaN values
port_DATA.dropna(inplace=True)

port_DATA = binning.binned(port_DATA)

entropy.ent(port_DATA)

dimension.dim(port_DATA)

print("Euclidean Distances: \n")
euclidean.euc(port_DATA)

decisiontree.decision_tree(port_DATA)
Пример #5
0
def slice_ent(s):
	counts = np.bincount(s)
	vals = np.true_divide(counts, s.shape[0])
	return ent(vals), np.sum(vals != 0)