Пример #1
0
def iamb(data, alpha=0.05, feature_selection=None, debug=False):
	"""
	IAMB Algorithm for learning the structure of a
	Discrete Bayesian Network from data.

	Arguments
	---------
	*data* : a nested numpy array

	*alpha* : a float
		The type II error rate.

	*feature_selection* : None or a string
		Whether to use IAMB as a structure learning
		or feature selection algorithm.

	Returns
	-------
	*bn* : a BayesNet object or
	*mb* : the markov blanket of a node

	Effects
	-------
	None

	Notes
	-----
	- Works but there are definitely some bugs.

	Speed Test:
		*** 5 vars, 624 obs ***
			- 196 ms
	"""
	n_rv = data.shape[1]
	Mb = dict([(rv,[]) for rv in range(n_rv)])

	if feature_selection is None:
		_T = range(n_rv)
	else:
		assert (not isinstance(feature_selection, list)), 'feature_selection must be only one value'
		_T = [feature_selection]

	# LEARN MARKOV BLANKET
	for T in _T:

		V = set(range(n_rv)) - {T}
		Mb_change=True

		# GROWING PHASE
		while Mb_change:
			Mb_change = False
			# find X_max in V-Mb(T)-{T} that maximizes 
			# mutual information of X,T|Mb(T)
			# i.e. max of mi_test(data[:,(X,T,Mb(T))])
			max_val = -1
			max_x = None
			for X in V - set(Mb[T]) - {T}:
				cols = (X,T)+tuple(Mb[T])
				mi_val = mi_test(data[:,cols],test=False)
				if mi_val > max_val:
					max_val = mi_val
					max_x = X
			# if Xmax is dependent on T given Mb(T)
			cols = (max_x,T) + tuple(Mb[T])
			if max_x is not None and are_independent(data[:,cols]):
				Mb[T].append(X)
				Mb_change = True
				if debug:
					print 'Adding %s to MB of %s' % (str(X), str(T))

		# SHRINKING PHASE
		for X in Mb[T]:
			# if x is independent of t given Mb(T) - {x}
			cols = (X,T) + tuple(set(Mb[T]) - {X})
			if are_independent(data[:,cols],alpha):
				Mb[T].remove(X)
				if debug:
					print 'Removing %s from MB of %s' % (str(X), str(T))

	if feature_selection is None:
		# RESOLVE GRAPH STRUCTURE
		edge_dict = resolve_markov_blanket(Mb, data)
		if debug:
			print 'Unoriented edge dict:\n %s' % str(edge_dict)
			print 'MB: %s' % str(Mb)
		# ORIENT EDGES
		oriented_edge_dict = orient_edges_gs2(edge_dict,Mb,data,alpha)
		if debug:
			print 'Oriented edge dict:\n %s' % str(oriented_edge_dict)

		# CREATE BAYESNET OBJECT
		value_dict = dict(zip(range(data.shape[1]),
			[list(np.unique(col)) for col in data.T]))
		bn=BayesNet(oriented_edge_dict,value_dict)

		return bn
	else:
		return Mb[_T]
Пример #2
0
def gs(data, alpha=0.05, feature_selection=None, debug=False):
    """
	Perform growshink algorithm over dataset to learn
	Bayesian network structure.

	This algorithm is clearly a good candidate for
	numba JIT compilation...

	STEPS
	-----
	1. Compute Markov Blanket
	2. Compute Graph Structure
	3. Orient Edges
	4. Remove Cycles
	5. Reverse Edges
	6. Propagate Directions

	Arguments
	---------
	*data* : a nested numpy array
		Data from which you wish to learn structure

	*alpha* : a float
		Type I error rate for independence test

	Returns
	-------
	*bn* : a BayesNet object

	Effects
	-------
	None

	Notes
	-----

	Speed Test:
		*** 5 variables, 624 observations ***
		- 63.7 ms

	"""
    n_rv = data.shape[1]
    data, value_dict = replace_strings(data, return_values=True)

    if feature_selection is None:
        _T = range(n_rv)
    else:
        assert not isinstance(feature_selection, list), "feature_selection must be only one value"
        _T = [feature_selection]

        # STEP 1 : COMPUTE MARKOV BLANKETS
    Mb = dict([(rv, []) for rv in range(n_rv)])

    for X in _T:
        S = []

        grow_condition = True
        while grow_condition:

            grow_condition = False
            for Y in range(n_rv):
                if X != Y and Y not in S:
                    # if there exists some Y such that Y is dependent on X given S,
                    # add Y to S
                    cols = (X, Y) + tuple(S)
                    pval = mi_test(data[:, cols])
                    if pval < alpha:  # dependent
                        grow_condition = True  # dependent -> continue searching
                        S.append(Y)

        shrink_condition = True
        while shrink_condition:

            TEMP_S = []
            shrink_condition = False
            for Y in S:
                s_copy = copy(S)
                s_copy.remove(Y)  # condition on S-{Y}
                # if X independent of Y given S-{Y}, leave Y out
                # if X dependent of Y given S-{Y}, keep it in
                cols = (X, Y) + tuple(s_copy)
                pval = mi_test(data[:, cols])
                if pval < alpha:  # dependent
                    TEMP_S.append(Y)
                else:  # independent -> condition searching
                    shrink_condition = True

        Mb[X] = TEMP_S
        if debug:
            print "Markov Blanket for %s : %s" % (X, str(TEMP_S))

    if feature_selection is None:
        # STEP 2: COMPUTE GRAPH STRUCTURE
        # i.e. Resolve Markov Blanket
        edge_dict = resolve_markov_blanket(Mb, data)
        if debug:
            print "Unoriented edge dict:\n %s" % str(edge_dict)

            # STEP 3: ORIENT EDGES
        oriented_edge_dict = orient_edges_MB(edge_dict, Mb, data, alpha)
        if debug:
            print "Oriented edge dict:\n %s" % str(oriented_edge_dict)

            # CREATE BAYESNET OBJECT
        bn = BayesNet(oriented_edge_dict, value_dict)

        return bn
    else:
        return Mb[_T]
Пример #3
0
def gs(data, alpha=0.05, feature_selection=None, debug=False):
    """
	Perform growshink algorithm over dataset to learn
	Bayesian network structure.

	This algorithm is clearly a good candidate for
	numba JIT compilation...

	STEPS
	-----
	1. Compute Markov Blanket
	2. Compute Graph Structure
	3. Orient Edges
	4. Remove Cycles
	5. Reverse Edges
	6. Propagate Directions

	Arguments
	---------
	*data* : a nested numpy array
		Data from which you wish to learn structure

	*alpha* : a float
		Type I error rate for independence test

	Returns
	-------
	*bn* : a BayesNet object

	Effects
	-------
	None

	Notes
	-----

	Speed Test:
		*** 5 variables, 624 observations ***
		- 63.7 ms

	"""
    n_rv = data.shape[1]
    data, value_dict = replace_strings(data, return_values=True)

    if feature_selection is None:
        _T = range(n_rv)
    else:
        assert (not isinstance(feature_selection, list)
                ), 'feature_selection must be only one value'
        _T = [feature_selection]

    # STEP 1 : COMPUTE MARKOV BLANKETS
    Mb = dict([(rv, []) for rv in range(n_rv)])

    for X in _T:
        S = []

        grow_condition = True
        while grow_condition:

            grow_condition = False
            for Y in range(n_rv):
                if X != Y and Y not in S:
                    # if there exists some Y such that Y is dependent on X given S,
                    # add Y to S
                    cols = (X, Y) + tuple(S)
                    pval = mi_test(data[:, cols])
                    if pval < alpha:  # dependent
                        grow_condition = True  # dependent -> continue searching
                        S.append(Y)

        shrink_condition = True
        while shrink_condition:

            TEMP_S = []
            shrink_condition = False
            for Y in S:
                s_copy = copy(S)
                s_copy.remove(Y)  # condition on S-{Y}
                # if X independent of Y given S-{Y}, leave Y out
                # if X dependent of Y given S-{Y}, keep it in
                cols = (X, Y) + tuple(s_copy)
                pval = mi_test(data[:, cols])
                if pval < alpha:  # dependent
                    TEMP_S.append(Y)
                else:  # independent -> condition searching
                    shrink_condition = True

        Mb[X] = TEMP_S
        if debug:
            print 'Markov Blanket for %s : %s' % (X, str(TEMP_S))

    if feature_selection is None:
        # STEP 2: COMPUTE GRAPH STRUCTURE
        # i.e. Resolve Markov Blanket
        edge_dict = resolve_markov_blanket(Mb, data)
        if debug:
            print 'Unoriented edge dict:\n %s' % str(edge_dict)

        # STEP 3: ORIENT EDGES
        oriented_edge_dict = orient_edges_MB(edge_dict, Mb, data, alpha)
        if debug:
            print 'Oriented edge dict:\n %s' % str(oriented_edge_dict)

        # CREATE BAYESNET OBJECT
        bn = BayesNet(oriented_edge_dict, value_dict)

        return bn
    else:
        return Mb[_T]
Пример #4
0
def iamb(data, alpha=0.05, feature_selection=None, debug=False):
    """
	IAMB Algorithm for learning the structure of a
	Discrete Bayesian Network from data.

	Arguments
	---------
	*data* : a nested numpy array

	*alpha* : a float
		The type II error rate.

	*feature_selection* : None or a string
		Whether to use IAMB as a structure learning
		or feature selection algorithm.

	Returns
	-------
	*bn* : a BayesNet object or
	*mb* : the markov blanket of a node

	Effects
	-------
	None

	Notes
	-----
	- Works but there are definitely some bugs.

	Speed Test:
		*** 5 vars, 624 obs ***
			- 196 ms
	"""
    n_rv = data.shape[1]
    Mb = dict([(rv, []) for rv in range(n_rv)])

    if feature_selection is None:
        _T = range(n_rv)
    else:
        assert (not isinstance(feature_selection, list)
                ), 'feature_selection must be only one value'
        _T = [feature_selection]

    # LEARN MARKOV BLANKET
    for T in _T:

        V = set(range(n_rv)) - {T}
        Mb_change = True

        # GROWING PHASE
        while Mb_change:
            Mb_change = False
            # find X_max in V-Mb(T)-{T} that maximizes
            # mutual information of X,T|Mb(T)
            # i.e. max of mi_test(data[:,(X,T,Mb(T))])
            max_val = -1
            max_x = None
            for X in V - set(Mb[T]) - {T}:
                cols = (X, T) + tuple(Mb[T])
                mi_val = mi_test(data[:, cols], test=False)
                if mi_val > max_val:
                    max_val = mi_val
                    max_x = X
            # if Xmax is dependent on T given Mb(T)
            cols = (max_x, T) + tuple(Mb[T])
            if max_x is not None and are_independent(data[:, cols]):
                Mb[T].append(X)
                Mb_change = True
                if debug:
                    print 'Adding %s to MB of %s' % (str(X), str(T))

        # SHRINKING PHASE
        for X in Mb[T]:
            # if x is independent of t given Mb(T) - {x}
            cols = (X, T) + tuple(set(Mb[T]) - {X})
            if are_independent(data[:, cols], alpha):
                Mb[T].remove(X)
                if debug:
                    print 'Removing %s from MB of %s' % (str(X), str(T))

    if feature_selection is None:
        # RESOLVE GRAPH STRUCTURE
        edge_dict = resolve_markov_blanket(Mb, data)
        if debug:
            print 'Unoriented edge dict:\n %s' % str(edge_dict)
            print 'MB: %s' % str(Mb)
        # ORIENT EDGES
        oriented_edge_dict = orient_edges_gs2(edge_dict, Mb, data, alpha)
        if debug:
            print 'Oriented edge dict:\n %s' % str(oriented_edge_dict)

        # CREATE BAYESNET OBJECT
        value_dict = dict(
            zip(range(data.shape[1]),
                [list(np.unique(col)) for col in data.T]))
        bn = BayesNet(oriented_edge_dict, value_dict)

        return bn
    else:
        return Mb[_T]