def findCombination(word, lstFunc, alphabet, offset, reprs): debug("findCombination(%s,%s,%s)" % (word, lstFunc, alphabet)) found = False tmpAlph = [] mutation = 1 his = dict() spaces = dict() spaceTree = Tree() spaceTree.add_features(space=offset) if contains(word, alphabet): info("Alphabet contains Word") info("PUSH %s" % word) exit() while not found: info("Mutation: %d !" % mutation) #debug #debug("> Tree:") #print spaceTree #print spaceTree.get_ascii(attributes=['space',]) for n in spaceTree.get_leaves(): #debug(">> Node:") #print spaceTree.get_ascii(attributes=['space',]) for f in lstFunc: tmpAlph = n.space #generate space from the new alphabet space = generateSpaceEx(f, tmpAlph, alphabet) tmpSpace = list(set([c[0] for c in space])) debugListHex(tmpSpace, "SPACE") #check to see any the word representation exists in the space for r in reprs: #debugListHex(r,"Checking Representation") if contains(r, tmpSpace): found = True info("FOUND : %s" % r) lstAncestors = [ n, ] lstAncestors.extend(n.get_ancestors()) nodeF = n.add_child(name=f) nodeF.add_features(space=tmpSpace, history=space) lstAncestors = [ nodeF, ] lstAncestors.extend(nodeF.get_ancestors()) getSolution(r, offset, lstAncestors) exit() nodeF = n.add_child(name=f) nodeF.add_features(space=tmpSpace, history=space) mutation = mutation + 1
def getSolution(reprs,offset,his): debugListHex(reprs,"reprs:",2) debugListHex(offset,"offset:",2) sol = dict() sol2 = dict() for rg in range(len(reprs)): r = reprs[rg] of = offset[rg] #print prettyText("searching for 0x%02x <= 0x%02x" % (r,of),"red") tPath = Tree(name=r) tPath.add_features(value=r) for h in his[:-1]: #print prettyText("in H","red") for leaf in tPath.get_leaves(): r = leaf.value #print prettyText("leaves: %s" % str(tPath.get_leaves()),"cyan") for line in h.history: res, alph, past, method = line[0], line[1], line[2], line[3].func_name #debug("0x%02x = 0x%02x %s. (0x%02x)" % (res,alph,method,past),2) #print prettyText("comparing res=0x%02x ?= r=0x%02x" % (res,r),"yellow") if res == r: n = leaf.add_child(name=alph) n.add_features(function=method,value=past) #print tPath.get_ascii(attributes=['name','function','value']) lf = tPath.get_leaves()[0] anc = lf.get_ancestors()[:-1] llf = [lf,] llf.extend(anc) vls = [c.name for c in llf] sol[rg] = llf for i in sol: vls = [(c.name, c.function) for c in sol[i]] sol2["method"] = [] for j in range(len(vls)): sol2["method"].append(sol[i][0].function) if sol2.has_key(j): sol2[j].append(vls[j][0]) else: sol2[j] = [] sol2[j].append(vls[j][0]) print prettyText("Solution:","red") info("PUSH\t\t0x%02x%02x%02x%02x" % (offset[0],offset[1],offset[2],offset[3])) test = [] test.append(offset[0] * 0x01000000 + offset[1] * 0x00010000 + offset[2] * 0x00000100 + offset[3] * 0x00000001) for m in range(len(sol2["method"])): test.append(sol2[m][0] * 0x01000000 + sol2[m][1] * 0x00010000 + sol2[m][2] * 0x00000100 + sol2[m][3] * 0x00000001) info("%s\t\t\t0x%02x%02x%02x%02x" % (sol2["method"][m],sol2[m][0],sol2[m][1],sol2[m][2],sol2[m][3])) info("RESULT\t\t0x%08x" % (reprs[0] * 0x01000000 + reprs[1] * 0x00010000 + reprs[2] * 0x00000100 + reprs[3] * 0x00000001)) testResult(test,(reprs[0] * 0x01000000 + reprs[1] * 0x00010000 + reprs[2] * 0x00000100 + reprs[3] * 0x00000001))
def build_tree(data, feature_info, sens, expl, output, metric, conf, max_depth, min_leaf_size=100, agg_type='avg', max_bins=10, subsample_frac=1.0): """ Builds a decision tree guided towards nodes with high bias Parameters ---------- data : the dataset feature_info : information about user features sens : name of the sensitive feature expl : name of the explanatory feature output : the target feature metric : the fairness metric to use conf : the confidence level max_depth : maximum depth of the decision-tree min_leaf_size : minimum size of a leaf agg_type : aggregation method for children scores max_bins : maximum number of bins to use when binning continuous features Returns ------- tree : the tree built by the algorithm """ logging.info('Building a Guided Decision Tree') tree = Tree() # Check if there are multiple labeled outputs # targets = data.columns[-output.num_labels:].tolist() targets = output.names.tolist() logging.debug('Targets: %s', targets) features = set(data.columns.tolist()) - set([sens, expl]) - set(targets) logging.debug('Contextual Features: %s', features) # check the data dimensions if metric.dataType == Metric.DATATYPE_CORR: if expl: dim = (feature_info[expl].arity, 6) else: dim = 6 else: # get the dimensions of the OUTPUT x SENSITIVE contingency table if expl: dim = (feature_info[expl].arity, output.arity, feature_info[sens].arity) else: dim = (output.arity, feature_info[sens].arity) logging.debug('Data Dimension for Metric: %s', dim) # bin the continuous features cont_thresholds = find_thresholds(data, features, feature_info, max_bins) score_params = ScoreParams(metric, agg_type, conf) split_params = SplitParams(targets, sens, expl, dim, feature_info, cont_thresholds, min_leaf_size, subsample_frac) # get a measure for the root if metric.dataType == Metric.DATATYPE_CT: stats = [count_values(data, sens, targets[0], expl, dim)[0]] elif metric.dataType == Metric.DATATYPE_CORR: stats = [corr_values(data, sens, targets[0], expl, dim)[0]] else: stats = [data[targets + [sens]]] _, root_metric = score(stats, score_params) tree.add_features(metric=root_metric[0]) # # Builds up the tree recursively. Selects the best feature to split on, # in order to maximize the average bias (mutual information) in all # sub-trees. def rec_build_tree(node_data, node, pred, split_features, depth, parent_score, pool): """ Recursive tree building. Parameters ---------- node_data : the data for the current node pred : the predicate defining the current context split_features : the features on which a split can occur depth : the current depth parent_score : the metric score at the parent pool : the thread pool Returns ------- tree : the tree built by the algorithm """ node.add_features(size=len(node_data)) # make a new leaf if recursion is stopped if (depth == max_depth) or (len(split_features) == 0): return logging.debug('looking for splits at pred %s', pred) # select the best feature to split on split_score, best_feature, threshold, to_drop, child_metrics = \ select_best_feature(node_data, split_features, split_params, score_params, parent_score, pool) # no split found, make a leaf if best_feature is None: return logging.info('splitting on %s (score=%s) with threshold %s at pred %s', best_feature, split_score, threshold, pred) if threshold: # binary split data_left = node_data[node_data[best_feature] <= threshold] data_right = node_data[node_data[best_feature] > threshold] # predicates for sub-trees pred_left = "{} <= {}".format(best_feature, threshold) pred_right = "{} > {}".format(best_feature, threshold) # add new nodes to the underlying tree structure left_child = node.add_child(name=str(pred_left)) left_child.add_features(feature_type='continuous', feature=best_feature, threshold=threshold, is_left=True, metric=child_metrics['left']) right_child = node.add_child(name=str(pred_right)) right_child.add_features(feature_type='continuous', feature=best_feature, threshold=threshold, is_left=False, metric=child_metrics['right']) # recursively build the tree rec_build_tree(data_left, left_child, pred + [pred_left], split_features - set(to_drop), depth + 1, split_score, pool) rec_build_tree(data_right, right_child, pred + [pred_right], split_features - set(to_drop), depth + 1, split_score, pool) else: # categorical split for val in node_data[best_feature].unique(): # check if this child was pruned or not if val in child_metrics: # predicate for the current sub-tree new_pred = "{} = {}".format(best_feature, val) # add a node to the underlying tree structure child = node.add_child(name=str(new_pred)) child.add_features(feature_type='categorical', feature=best_feature, category=val, metric=child_metrics[val]) child_data = node_data[node_data[best_feature] == val] # recursively build the tree rec_build_tree( child_data, child, pred + [new_pred], split_features - set(to_drop + [best_feature]), depth + 1, split_score, pool) # # When contextual features are just a few there is # no actual benefit out of parallelization. In fact, # contention introduces a slight overhead. Hence, # use only one thread to score less than 10 features. # if len(features) < 10: pool_size = 1 else: pool_size = 1 #max(1, multiprocessing.cpu_count() - 2) pool = multiprocessing.Pool(pool_size) rec_build_tree(data, tree, [], features, 0, 0, pool) pool.close() pool.join() return tree
w, v = LA.eig(matrizQ) print(w) print(v) print(np.diag(w)) print(np.exp(np.diag(w))) print(np.diag(np.exp(w))) print(v.T.I) print("aqui") matrizP = v.T.I * np.diag(np.exp(w)) * v.T print(sum(matrizP[1, :])) print(matrizP) # t = Tree("((a,b),c);") #print(samplerAC) t = Tree("((A, B)Internal_1:0.7, (C, D)Internal_2:0.5)root:1.3;", format=1) t.add_features(size=4) print t.get_ascii(attributes=["name", "dist", "size"]) #tree = Phylo.parse("/Users/patricioburchard/Downloads/Particiones_grupos_de_especies/part1.nex.con.tre", "nexus").next() #tree = Phylo.read("/Users/patricioburchard/Downloads/Cercosaura_FinalPAUP.tre", "nexus") #tree.rooted = True #Phylo.draw(tree) #infile = open('/Users/patricioburchard/Downloads/Particiones_grupos_de_especies/part1.nex.run1.p', 'r') #t.show() n = NexusReader( '/Users/patricioburchard/Downloads/Particiones_grupos_de_especies/part1.nex.con.tre' ) n.read_file( '/Users/patricioburchard/Downloads/Particiones_grupos_de_especies/part1.nex.con.tre' )