def generate( active_degree: int, passive_degree: int, label_count: int, actives_all_same: bool, passives_all_same: bool, flags: ProblemFlags, count_limit: int = sys.maxsize, # TODO: remove the param or use it skip_count: int = 0, # TODO: remove the param or use it ) -> List[P]: alphabet = letter_range(label_count) # take active_degree labels # from a pallete of active_label_count if flags.is_directed_or_rooted: # rooted/directed: order of configs does not matter, # except for the frist label in each config. Thus, # we have additional product() call below. # e.g. degree = 3, labels = 2, gives us: # ['AAA', 'AAB', 'ABB', 'BAA', 'BAB', 'BBB'] actives = ( "".join(x) for x in combinations_with_replacement(alphabet, active_degree - 1)) passives = ( "".join(x) for x in combinations_with_replacement(alphabet, passive_degree - 1)) actives = ("".join(x) for x in product(alphabet, actives)) passives = ("".join(x) for x in product(alphabet, passives)) else: # unrooted/undirected: order of configs does not matter # e.g. degree 3, labels = 2, gives us # ['AAA', 'AAB', 'ABB', 'BBB'] actives = ( "".join(x) for x in combinations_with_replacement(alphabet, active_degree)) passives = ( "".join(x) for x in combinations_with_replacement(alphabet, passive_degree)) if actives_all_same: actives = (x for x in actives if x[0] * len(x) == x) if passives_all_same: passives = (x for x in passives if x[0] * len(x) == x) active_constraints = (tuple([" ".join(y) for y in x]) for x in tqdm(powerset(actives)) if x) passive_constraints = (tuple([" ".join(y) for y in x]) for x in tqdm(powerset(passives)) if x) problem_tuples = ( (a, b) for (a, b) in product(active_constraints, passive_constraints)) return problem_from_constraints(problem_tuples, flags)
def computeIISsBruteForce(self): """ DEPRECATED not an efficient way to compute IISs. If all IIS contain at least one free feature, then safe policies exist. a brute force way to find all iiss and return their indicies can be O(2^|unknown features|) """ unknownConsPowerset = powerset(self.unknownCons) feasible = {} iiss = [] for cons in unknownConsPowerset: # if any subset is already infeasible, no need to check this set. it's definitely infeasible and not an iis if len(cons) > 0 and any(not feasible[subset] for subset in combinations(cons, len(cons) - 1)): feasible[cons] = False continue # find if the lp is feasible by posing cons sol = self.findConstrainedOptPi(cons) feasible[cons] = sol['feasible'] if len(cons) == 0 and not feasible[cons]: # no iiss in this case. problem infeasible return [] # if it is infeasible and all its subsets are feasible (only need to check the subsets with one less element) # then it's an iis if not feasible[cons] and all(feasible[subset] for subset in combinations(cons, len(cons) - 1)): iiss.append(cons) self.iiss = iiss
def probOfExistanceOfSafePolicies(self, lockedCons, freeCons): """ Compute the probability of existence of at least one safe policies. This considers the changeabilities of all unknown features. lockedCons, freeCons: The set of locked and free features. They might be different from the ones confirmed by querying. These are hypothetical ones just to compute the corresponding prob. """ unknownCons = set(self.consIndices) - set(lockedCons) - set(freeCons) # \EE[policy exists] expect = 0 allSubsetsOfUnknownCons = powerset(unknownCons) for freeSubset in allSubsetsOfUnknownCons: # assume now freeSubset is free and uknownCons \ freeSubset is locked # compute the prob. that this happens (given lockedCons and freeCons) prob = reduce(mul, map(lambda _: self.consProbs[_], freeSubset), 1) *\ reduce(mul, map(lambda _: 1 - self.consProbs[_], unknownCons - set(freeSubset)), 1) # an indicator represents if safe policies exist safePolicyExist = self.safePolicyExist(freeCons=list(freeCons) + list(freeSubset)) expect += safePolicyExist * prob return expect
def computeIISsBruteForce(self): """ DEPRECATED not an efficient way to compute IISs. If all IIS contain at least one free feature, then safe policies exist. a brute force way to find all iiss and return their indicies can be O(2^|unknown features|) """ unknownConsPowerset = powerset(self.allCons) feasible = {} iiss = [] for cons in unknownConsPowerset: # if any subset is already infeasible, no need to check this set. it's definitely infeasible and not an iis if len(cons) > 0 and any(not feasible[subset] for subset in combinations(cons, len(cons) - 1)): feasible[cons] = False continue # find if the lp is feasible by posing cons sol = self.findConstrainedOptPi(cons) feasible[cons] = sol['feasible'] if len(cons) == 0 and not feasible[cons]: # no iiss in this case. problem infeasible return [] # if it is infeasible and all its subsets are feasible (only need to check the subsets with one less element) # then it's an iis if not feasible[cons] and all(feasible[subset] for subset in combinations(cons, len(cons) - 1)): iiss.append(cons) self.iiss = iiss
def getProbOfExistenceOfSafePolicies(self, lockedCons, freeCons): """ Compute the probability that safe policies exist using dominating policies (the best way?) lockedCons, freeCons: The set of locked and free features. They might be different from the ones confirmed by querying. These are hypothetical ones just to compute the corresponding prob. """ result = 0 def pf(con): if con in lockedCons: return 0 elif con in freeCons: return 1 else: return self.consProbs[con] assert hasattr(self, 'domPiFeats') # two ways to compute the probs. either 2^|relFeats| or 2^|domPis| # so see which one is smaller if len(self.relFeats) < len(self.domPiFeats): allSubsetsOfRelFeats = powerset(self.relFeats) for freeSubset in allSubsetsOfRelFeats: if self.safePolicyExist(freeCons = list(freeSubset) + list(self.knownFreeCons)): prob = self.probFeatsBeingFree(freeSubset) * self.probFeatsBeingLocked(set(self.relFeats) - set(freeSubset)) result += prob else: for k in range(1, len(self.domPiFeats) + 1): sign = 1 if k % 2 == 1 else -1 for domPiFeatsSubset in combinations(self.domPiFeats, k): domPiFeatsSubsetLists = map(lambda _: list(_), domPiFeatsSubset) unionOfFeats = set(sum(domPiFeatsSubsetLists, [])) result += sign * reduce(mul, map(pf, unionOfFeats), 1) return result
def findRelevantFeaturesBruteForce(self): """ a method simply to measure the time needed to compute all dominating policies """ allConsPowerset = set(powerset(self.unknownCons)) for subsetsToConsider in allConsPowerset: self.findConstrainedOptPi(subsetsToConsider)
def _fifteens(self): """Compute the score for fifteens""" fifteen_score = 0 pset = powerset(self.all_five) for entry in pset: if self._sum_cards(entry) == 15: fifteen_score += 2 return fifteen_score
def knapsack(loot, weight_limit): all_combo = powerset(loot) best_value = None for combo in all_combo: combo_weight = sum([item.weight for item in combo]) combo_value = sum([item.value for item in combo]) if combo_weight <= weight_limit: if not best_value or best_value < combo_value: best_value = combo_value if not best_value: print("knapsack couldn't fit any items") return best_value
def _runs(self): """Compute the score for a run of three, four, or five cards""" # Used for sorting subsets def get_card_value(card): return card.value runs = [] run_score = 0 pset = powerset(self.all_five) for entry in pset: sub_run = False if len(entry) < 3: continue sorted_entry = sorted(entry, key=get_card_value) current_val = sorted_entry[0].value count = 1 for card in sorted_entry[1:]: if card.value == (current_val + 1): count += 1 current_val = card.value else: count = 0 if count == 0: continue if count >= 3: runs.extend([sorted_entry]) runs_final = [] for run1 in runs: sublist = False for run2 in runs: if self._sublist(run1, run2): sublist = True if not sublist: runs_final.extend([run1]) for run in runs_final: run_score += len(run) return run_score
def divisors(n): """A set of the proper divisors of n for any positive integer n. >>> divisors(1) {1} >>> divisors(17.0) {1} >>> divisors(4) == {1, 2} True >>> divisors(49) == {1, 7} True >>> divisors(120) == {1, 2, 3, 4, 5, 6, 8, 10, 12, 15, 20, 24, 30, 40, 60} True >>> len(divisors(360)) 23 >>> len(divisors(2520)) 47 >>> divisors(0) Traceback (most recent call last): ... ValueError: n must be positive >>> divisors(2**0.5) Traceback (most recent call last): ... ValueError: n must be an integer """ from functools import reduce from math import floor from util import powerset, prime_factorization if floor(n) != n: raise ValueError("n must be an integer") n = floor(n) if not n >= 1: raise ValueError("n must be positive") pf = prime_factorization(n) factor_sets = powerset(pf) product = lambda factors: reduce(lambda x,y: x*y, factors, 1) divisors = {product(factors) for factors in factor_sets} if n != 1: divisors.discard(n) return divisors
def choice(self, contracts): print "choosing from: ", for c in contracts: print c.id, print "" # Generate power set of possible contracts allContractSets = util.powerset(contracts) maxU = 0 maxContractSet = [] for contractSet in allContractSets: u = self.utility(self, contractSet) if u > maxU: maxU = u maxContractSet = contractSet return maxContractSet
def findRelevantFeaturesAndDomPis(self): """ Incrementally add dominating policies to a set DomPolicies algorithm in the IJCAI paper """ beta = [] # rules to keep dominatingPolicies = {} allCons = set() allConsPowerset = set(powerset(allCons)) subsetsConsidered = [] # iterate until no more dominating policies are found while True: subsetsToConsider = allConsPowerset.difference(subsetsConsidered) if len(subsetsToConsider) == 0: break # find the subset with the smallest size activeCons = min(subsetsToConsider, key=lambda _: len(_)) #if config.DEBUG: print 'activeCons', activeCons subsetsConsidered.append(activeCons) skipThisCons = False for enf, relax in beta: if enf.issubset(activeCons) and len(relax.intersection(activeCons)) == 0: # this subset can be ignored skipThisCons = True break if skipThisCons: continue sol = self.findConstrainedOptPi(activeCons) if sol['feasible']: x = sol['pi'] if config.DEBUG: printOccSA(x) print self.computeValue(x) dominatingPolicies[activeCons] = x # check violated constraints violatedCons = self.findViolatedConstraints(x) if config.DEBUG: print 'x violates', violatedCons else: # infeasible violatedCons = () if config.DEBUG: print 'infeasible' # beta records that we would not enforce activeCons and relax occupiedFeats in the future beta.append((set(activeCons), set(violatedCons))) allCons.update(violatedCons) allConsPowerset = set(powerset(allCons)) domPis = [] for pi in dominatingPolicies.values(): if pi not in domPis: domPis.append(pi) if config.DEBUG: print 'rel cons', allCons, 'num of domPis', len(domPis) return allCons, domPis
def findRelevantFeaturesBruteForce(self): allConsPowerset = set(powerset(self.allCons)) for subsetsToConsider in allConsPowerset: self.findConstrainedOptPi(subsetsToConsider)
def computeOptQueries(self): """ f(\phi_l, \phi_f) = 0, if safePolicyExist(\phi_f) or self.safePolicyNotExist(\phi_l) min_\phi p_f(\phi) f(\phi_l, \phi_f + {\phi}) + (1 - p_f(\phi)) f(\phi_l + {\phi}, \phi_f), o.w. Boundaries condition: \phi_l is not a superset of any iis, \phi_f is not a superset of rel feats of any dom pi, otherwise 0 for sure """ consPowerset = list(powerset(self.relFeats)) # free/locked cons that are not supersets of elements on their boundaries admissibleFreeCons = [] admissibleLockedCons = [] # the set of (lockedCons, freeCons) to evaluate the optimal queries # it's the cross product of the two sets above, excluding free and locked cons that share elements admissibleCons = [] for lockedCons in consPowerset: if self.safePolicyNotExist(lockedCons=lockedCons): if not any(set(lockedCons).issuperset(lockedB) for lockedB in self.lockedBoundary): self.lockedBoundary.append(lockedCons) else: admissibleLockedCons.append(lockedCons) for freeCons in consPowerset: if self.safePolicyExist(freeCons=freeCons): if not any(set(freeCons).issuperset(freeB) for freeB in self.freeBoundary): self.freeBoundary.append(freeCons) else: admissibleFreeCons.append(freeCons) if config.VERBOSE: print 'locked', self.lockedBoundary print 'free', self.freeBoundary for lockedCons in admissibleLockedCons: for freeCons in admissibleFreeCons: # any cons should not be known to be both free and locked if set(lockedCons).isdisjoint(set(freeCons)): admissibleCons.append((lockedCons, freeCons)) readyToEvaluate = lambda l, f: all(self.getQueryAndValue(l, set(f).union({con})) != None \ and self.getQueryAndValue(set(l).union({con}), f) != None \ for con in set(self.relFeats) - set(l) - set(f)) # keep the sets of cons that are ready to evaluate in the next iteration readyToEvalSet = [] for (lockedCons, freeCons) in admissibleCons: if readyToEvaluate(lockedCons, freeCons): readyToEvalSet.append((lockedCons, freeCons)) # keep fill out the values of optQs within boundary # whenever filled out while len(readyToEvalSet) > 0: if config.VERBOSE: print len(readyToEvalSet), 'need to be evaluated' (lockedCons, freeCons) = readyToEvalSet.pop() unknownCons = set(self.relFeats) - set(lockedCons) - set(freeCons) minNums = [(con,\ self.consProbs[con] * self.getQueryAndValue(lockedCons, set(freeCons).union({con}))[1]\ + (1 - self.consProbs[con]) * self.getQueryAndValue(set(lockedCons).union({con}), freeCons)[1]\ + 1) # count con in for con in unknownCons] # pick the tuple that has the minimum obj value after querying self.setQueryAndValue(lockedCons, freeCons, min(minNums, key=lambda _: _[1])) # add neighbors that ready to evaluate to readToEvalSet readyToEvalSet += filter(lambda (l, f): self.getQueryAndValue(l, f) == None and readyToEvaluate(l, f),\ [(set(lockedCons) - {cons}, freeCons) for cons in lockedCons] +\ [(lockedCons, set(freeCons) - {cons}) for cons in freeCons])
def computeOptimalQuery(self, knownLockedCons, knownFreeCons, unknownCons, psi): """ recursively compute the optimal query, return the value after query """ # the key used for optQueryAndValueDict # use frozenset here because the order of features doesn't matter key = (frozenset(knownLockedCons), frozenset(knownFreeCons), frozenset(unknownCons), tuple(psi)) if key in self.optQueryAndValueDict.keys(): return self.optQueryAndValueDict[key] rewardSupports = self.computeConsistentRewardIndices(psi) self.imaginedMDP.updatePsi(psi) # compute the current safe policy if key in self.currentOptPiValueDict.keys(): currentSafelyOptValue = self.currentOptPiValueDict[key] else: currentSafelyOptValue = self.findConstrainedOptPi( activeCons=list(unknownCons) + list(knownLockedCons), addKnownLockedCons=False, mdp=self.imaginedMDP)['obj'] # feature queries if len(unknownCons) > 0: consQueryValues = { ('F', con): self.consProbs[con] * self.computeOptimalQuery( knownLockedCons, knownFreeCons + [con], set(unknownCons) - {con}, psi)[1] + (1 - self.consProbs[con]) * self.computeOptimalQuery( knownLockedCons + [con], knownFreeCons, set(unknownCons) - {con}, psi)[1] - self.costOfQuery for con in unknownCons } else: consQueryValues = {} # reward queries psiOfSet = lambda rSet: sum(psi[_] for _ in rSet) if len(rewardSupports) > 1: rewardQueryValues = { ('R', rSet): psiOfSet(rSet) * self.computeOptimalQuery( knownLockedCons, knownFreeCons, unknownCons, computePosteriorBelief(psi, consistentRewards=rSet))[1] + (1 - psiOfSet(rSet)) * self.computeOptimalQuery( knownLockedCons, knownFreeCons, unknownCons, computePosteriorBelief(psi, inconsistentRewards=rSet))[1] - self.costOfQuery for rSet in powerset( rewardSupports, minimum=1, maximum=len(rewardSupports) - 1) } else: rewardQueryValues = {} queryAndValues = consQueryValues.copy() queryAndValues.update(rewardQueryValues) # also, there's an option to not pose a query queryAndValues[None] = currentSafelyOptValue optQueryAndValue = max(queryAndValues.items(), key=lambda _: _[1]) self.optQueryAndValueDict[key] = optQueryAndValue return optQueryAndValue
def findDomPi(self): """ (re)compute all dominating policies given reward and safety uncertainty and then sample one stored in self.dompis = [(dompi, weighted_prob)] """ domPisData = [] allDomPis = [] priorPi = self.computeCurrentSafelyOptPi() consistentRewardIndices = self.computeConsistentRewardIndices( self.mdp.psi) for rIndices in powerset(consistentRewardIndices, minimum=1, maximum=self.sizeOfRewards): rewardPositiveMDP = copy.deepcopy(self.mdp) rewardPositiveMDP.updatePsi( computePosteriorBelief(self.mdp.psi, consistentRewards=rIndices)) sumOfPsi = sum(self.mdp.psi[_] for _ in rIndices) rewardPositiveConsAgent = ConsQueryAgent( rewardPositiveMDP, self.consStates, self.goalCons, self.consProbs, knownFreeCons=self.knownFreeCons, knownLockedCons=self.knownLockedCons) _, domPis = rewardPositiveConsAgent.findRelevantFeaturesAndDomPis() for domPi in domPis: relFeats = rewardPositiveConsAgent.findViolatedConstraints( domPi) # we are going to query about rIndices and relFeatures # we regard them as batch queries and compute the possible responses safeProb = numpy.prod( [self.consProbs[feat] for feat in relFeats]) rPositiveValue = rewardPositiveConsAgent.computeValue(domPi) # priorPi is feasible under relFeats since priorPi is safer (before querying) priorValue = rewardPositiveConsAgent.computeValue(priorPi) # 1 <= len(rIndices) <= sizeOfRewards rewardQueryNeeded = (len(rIndices) < len(consistentRewardIndices)) # at least (relFeats) feature queries and 1 reward-set query are needed weightedValue = safeProb * sumOfPsi * ( rPositiveValue - priorValue - self.costOfQuery * (len(relFeats) + rewardQueryNeeded)) if domPi not in allDomPis: allDomPis.append(domPi) if weightedValue > 0: # only add dom pi info when it's beneficial to query about this domPisData.append( self.DomPiData(pi=domPi, weightedValue=weightedValue, optimizedRewards=rIndices, violatedCons=relFeats)) if self.domPiNum is None: self.domPiNum = len(allDomPis) if len(domPisData) > 0: self.objectDomPiData = max(domPisData, key=lambda datum: datum.weightedValue) else: self.objectDomPiData = None if config.VERBOSE: print 'chosen dom pi', self.objectDomPiData
def _satisfactory_placement_generator(self): # generates all satisfactory mine placements # note the use of of powerset, a function (not method) # defined in this using the recipe from itertools package return filter(self._is_satisfactory_placement, powerset(self.perimiter))
def findRelevantFeaturesAndDomPis(self): """ Incrementally add dominating policies to a set DomPolicies algorithm in the IJCAI paper """ beta = [] # rules to keep dominatingPolicies = {} allCons = set() allConsPowerset = set(powerset(allCons)) subsetsConsidered = [] # iterate until no more dominating policies are found while True: subsetsToConsider = allConsPowerset.difference(subsetsConsidered) if len(subsetsToConsider) == 0: break # find the subset with the smallest size activeCons = min(subsetsToConsider, key=lambda _: len(_)) #if config.DEBUG: print 'activeCons', activeCons subsetsConsidered.append(activeCons) skipThisCons = False for enf, relax in beta: if enf.issubset(activeCons) and len( relax.intersection(activeCons)) == 0: # this subset can be ignored skipThisCons = True break if skipThisCons: continue sol = self.findConstrainedOptPi(activeCons) if sol['feasible']: x = sol['pi'] if config.DEBUG: printOccSA(x) print self.computeValue(x) dominatingPolicies[activeCons] = x # check violated constraints violatedCons = self.findViolatedConstraints(x) if config.DEBUG: print 'x violates', violatedCons else: # infeasible violatedCons = () if config.DEBUG: print 'infeasible' # beta records that we would not enforce activeCons and relax occupiedFeats in the future beta.append((set(activeCons), set(violatedCons))) allCons.update(violatedCons) allConsPowerset = set(powerset(allCons)) domPis = [] for pi in dominatingPolicies.values(): if pi not in domPis: domPis.append(pi) if config.DEBUG: print 'rel cons', allCons, 'num of domPis', len(domPis) return allCons, domPis
def best(patterns, extended): # Enumberate daytimes dts = collections.defaultdict(set) all = [] for p in patterns: all.extend(p[0].each()) for p in all: for dt in p.getDayTimesRaw(): dts[dt.data()].add(p) # For each daytime, iterate patterns to find termweeks dt_tw = {} dt_tw_sz = {} for (dt, ps) in dts.iteritems(): tws = collections.defaultdict(set) for p in ps: for (term, week) in p.getTermWeeks().each(): tws[term].add(week) dt_tw[dt] = tws dt_tw_sz[dt] = reduce(lambda tot, item: tot + len(item), tws.itervalues(), 0) # restrict to at most max_trials (longest) dt_use = set() dt_candidates = dt_tw.keys() for i in range(0, max_trials): if len(dt_candidates) == 0: break use = max(dt_candidates, key=lambda k: dt_tw_sz[k]) dt_candidates.remove(use) dt_use.add(use) # find longest range of each, using 1-8,9-16,17-24 type ranges to allow term overlap dt_longest = {} for dt in dt_use: # build termy week numbers (1-24) week_nums = set() for (term, weeks) in dt_tw[dt].iteritems(): for week in filter(lambda x: x > 0 and x < 9, weeks): week_nums.add(term * 8 + week) ranges = sorted(util.ranges(week_nums), key=lambda x: x[1], reverse=True) if len(ranges) == 0: dt_longest[dt] = set() else: dt_longest[dt] = set( range(ranges[0][0], ranges[0][0] + ranges[0][1])) # permute through including and excluding date ranges to see which gives best coverage (EXPONENTIAL!) best_score = None best = None for dts in util.powerset(dt_use): if len(dts) == 0: continue all = set(range(1, 25)) for dt in dts: all &= dt_longest[dt] score = len(all) * len(dts) if best_score == None or score > best_score: best_score = score best = dts # Generate pattern if best is None: logger.error("No common in %s" % all) return None p = patternatom.PatternAtom(False) for b in best: p.addDayTimeRange(b[0], b[1][0], b[1][1], b[2][0], b[2][1]) p.setAllYear() # Extend to include out-of-term dates, where required if extended: for q in patterns: for qa in q[0].blast(): p.expand_back_to(qa) p.expand_forward_to(qa) return p
def execute(args): print 'Starting the artificial neural network' if len(args) < 2: usage() sys.exit() ############################################################################### # Data # names feature labels # y shuffled names # x features that correspond to shuffled names names, y, x = parse(args[1]) x = clean(names, x) usePowerset = args[0] # Build features to include in test features = args[2:] if len(features) == 0: features = names # print 'Selected features:', features # Build all subsets of features, if requested if usePowerset.lower() == 'true': combos = powerset(features) else: combos = [features] # map from feature set, to map of correct counts for each person feature_performance = {} highest_correct = 0 best_combo = {} for c in combos: if len(c) == 0: continue print 'Attempting feature set:', c x_selected = selectFeatures(copy.copy(names), c, x) # Split into testing and traiing data x_train, x_test, y_train, y_test = train_test_split(x_selected, y, test_size=0.2, random_state=0) ############################################################################### # Models logistic = linear_model.LogisticRegression(C=L_REGULARIZATION) rbm = BernoulliRBM(random_state=0, verbose=True, learning_rate=N_LEARNING_RATE, n_iter=N_ITER, n_components=N_COMPONENTS) # Note: attempted StandardScaler, MinMaxScaler, MaxAbsScaler, without strong results # Not needed, since data is scaled to the [0-1] range by clean() classifier = Pipeline(steps=[('rbm', rbm),('logistic', logistic)]) # ############################################################################### # Training print 'Training the classifier...' # Training RBM-Logistic Pipeline classifier.fit(x_train,y_train) correct = 0 label_counts = defaultdict(int) for i in range(len(x_test)): test = x_test[i] if len(test) == 1: test = test.reshape(-1, 1) else: test = [test] predicted = classifier.predict(test) if predicted == y_test[i]: correct += 1 label_counts[predicted[0]] += 1 if correct >= highest_correct: highest_correct = correct best_combo = c feature_performance[str(c)] = {'predictions':label_counts,'expected':Counter(y_test)} ############################################################################### # Evaluation # evaluate(classifier, x_test, y_test) summary = feature_performance[str(best_combo)] print 'Accuracy:\t\t\t', highest_correct, 'correct gives', (highest_correct * 1.0/len(y_test)), 'compared to guessing', (1.0/len(summary['expected'])) print 'Best feature set:\t\t', best_combo print 'Identified %d out of %d labels'%(len(summary['predictions']),len(summary['expected'])) for p in summary['predictions']: pred = summary['predictions'][p] tot = summary['expected'][p] print '\t %s \t\t %d\t of %d \t (%f)'%(p, pred, tot, pred * 1.0/tot)
def _gcjut_rec(loop, t, conj_existing_states=False): # Base case if type(t) == Var or type(t) == Const: return [Join(loop, t)] out = [] # Recursively call function on subterms joins = [_gcjut_rec(loop, st) for st in t.terms] for j_comb in product(*joins): # For this particular combination of joins, obtain a merged join merged_join = merge(loop, t.op, j_comb) vprint(P_JOIN_GEN, "Join: merged these joins:") for join in j_comb: vprint(P_JOIN_GEN, "Join:", join) out.append(merged_join) # Case when merged join is not a new auxillary vprint(P_JOIN_GEN, "Join: candidate join (merged) =\n", merged_join) if not merged_join.term.state_free("SV"): continue # Find all constants and obtain a mapping to their locations const_indv = _get_const_indv(merged_join.term) if not const_indv: return out for const in const_indv.keys(): vprint( P_JOIN_GEN, "Join: const %s appears in locations %s within %s)" % (str(const), str(const_indv[const]), str(merged_join.term))) for ind_set in powerset(const_indv[const]): if not ind_set: continue rem_set = const_indv[const][:] auxjn = Join(merged_join.loop, merged_join.term) k = auxjn.loop.get_num_states() # Conjecture that this particular choice of indices corresponds # to locations of an auxillary state variable for ind in ind_set: auxjn.term.set_term_at(ind, Var("RSV", "s", k + 1)) rem_set.remove(ind) # Unfold right variables in term to obtain definition for auxillary auxterm = auxjn.term.rename("RSV", "SV").apply_subst( merged_join.loop.get_full_state_subst()) # For all remaining indices, conjecture that some of them point to # existing state variables (if conj_existing_states is True) for state_assgn in product( *[list(range(loop.get_num_states() + 1)) for _ in range(len(rem_set))]) \ if conj_existing_states else [[0] * len(rem_set)]: auxjn_v = deepcopy(auxjn) auxterm_v = deepcopy(auxterm) for i in range(len(rem_set)): if state_assgn[i] != 0: auxterm_v.set_term_at( rem_set[i], Var("SV", "s", state_assgn[i])) # Add the auxillary variable and set the join to be the auxillary # Note: the auxillary variable could already exist among the states, # in which case, r is an index to the existing state r = auxjn_v.loop.add_state(const, auxterm_v, k) auxjn_v.term = Var("RSV", "s", r + 1) out.append(auxjn_v) vprint(P_JOIN_GEN, "Join: new auxillary variable:") vprint( P_JOIN_GEN, "Join: %s = %s" % (str(auxjn_v.term), str(auxterm_v))) vprint(P_JOIN_GEN, "Join: candidate join (with auxillaries) =\n", str(auxjn_v)) return out
def execute(args): print 'Starting the artificial neural network' if len(args) < 2: usage() sys.exit() ############################################################################### # Data # names feature labels # y shuffled names # x features that correspond to shuffled names names, y, x = parse(args[1]) x = clean(names, x) usePowerset = args[0] # Build features to include in test features = args[2:] if len(features) == 0: features = names # print 'Selected features:', features # Build all subsets of features, if requested if usePowerset.lower() == 'true': combos = powerset(features) else: combos = [features] # map from feature set, to map of correct counts for each person feature_performance = {} highest_correct = 0 best_combo = {} for c in combos: if len(c) == 0: continue print 'Attempting feature set:', c x_selected = selectFeatures(copy.copy(names), c, x) # Split into testing and traiing data x_train, x_test, y_train, y_test = train_test_split(x_selected, y, test_size=0.2, random_state=0) ############################################################################### # Models logistic = linear_model.LogisticRegression(C=L_REGULARIZATION) rbm = BernoulliRBM(random_state=0, verbose=True, learning_rate=N_LEARNING_RATE, n_iter=N_ITER, n_components=N_COMPONENTS) # Note: attempted StandardScaler, MinMaxScaler, MaxAbsScaler, without strong results # Not needed, since data is scaled to the [0-1] range by clean() classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)]) # ############################################################################### # Training print 'Training the classifier...' # Training RBM-Logistic Pipeline classifier.fit(x_train, y_train) correct = 0 label_counts = defaultdict(int) for i in range(len(x_test)): test = x_test[i] if len(test) == 1: test = test.reshape(-1, 1) else: test = [test] predicted = classifier.predict(test) if predicted == y_test[i]: correct += 1 label_counts[predicted[0]] += 1 if correct >= highest_correct: highest_correct = correct best_combo = c feature_performance[str(c)] = { 'predictions': label_counts, 'expected': Counter(y_test) } ############################################################################### # Evaluation # evaluate(classifier, x_test, y_test) summary = feature_performance[str(best_combo)] print 'Accuracy:\t\t\t', highest_correct, 'correct gives', ( highest_correct * 1.0 / len(y_test)), 'compared to guessing', (1.0 / len(summary['expected'])) print 'Best feature set:\t\t', best_combo print 'Identified %d out of %d labels' % (len( summary['predictions']), len(summary['expected'])) for p in summary['predictions']: pred = summary['predictions'][p] tot = summary['expected'][p] print '\t %s \t\t %d\t of %d \t (%f)' % (p, pred, tot, pred * 1.0 / tot)
def computeOptQueries(self): """ f(\phi_l, \phi_f) = 0, if safePolicyExist(\phi_f) or self.safePolicyNotExist(\phi_l) min_\phi p_f(\phi) f(\phi_l, \phi_f + {\phi}) + (1 - p_f(\phi)) f(\phi_l + {\phi}, \phi_f), o.w. Boundaries condition: \phi_l is not a superset of any iis, \phi_f is not a superset of rel feats of any dom pi, otherwise 0 for sure """ consPowerset = list(powerset(self.relFeats)) # free/locked cons that are not supersets of elements on their boundaries # that is, we can't determine if safe policy exsits if these elements are known to be locked/free admissibleFreeCons = [] admissibleLockedCons = [] # the set of (lockedCons, freeCons) to evaluate the optimal queries # it's the cross product of the two sets above, excluding free and locked cons that share elements admissibleCons = [] for lockedCons in consPowerset: if self.safePolicyNotExist(lockedCons=lockedCons): # make sure not elements in boundary is a superset of another element if not any( set(lockedCons).issuperset(lockedB) for lockedB in self.lockedBoundary): self.lockedBoundary.append(lockedCons) else: admissibleLockedCons.append(lockedCons) for freeCons in consPowerset: if self.safePolicyExist(freeCons=freeCons): # similarly if not any( set(freeCons).issuperset(freeB) for freeB in self.freeBoundary): self.freeBoundary.append(freeCons) else: admissibleFreeCons.append(freeCons) if config.DEBUG: print 'locked', self.lockedBoundary print 'free', self.freeBoundary for lockedCons in admissibleLockedCons: for freeCons in admissibleFreeCons: # any cons should not be known to be both free and locked if set(lockedCons).isdisjoint(set(freeCons)): admissibleCons.append((lockedCons, freeCons)) # make sure all terms on the RHS (of def of f above) are evaluated readyToEvaluate = lambda l, f: all(self.getQueryAndValue(l, set(f).union({con})) != None \ and self.getQueryAndValue(set(l).union({con}), f) != None \ for con in set(self.relFeats) - set(l) - set(f)) # keep the sets of cons that are ready to evaluate in the next iteration readyToEvalSet = [] for (lockedCons, freeCons) in admissibleCons: if readyToEvaluate(lockedCons, freeCons): readyToEvalSet.append((lockedCons, freeCons)) # keep fill out the values of optQs within boundary # whenever filled out while len(readyToEvalSet) > 0: if config.DEBUG: print len(readyToEvalSet), 'need to be evaluated' (lockedCons, freeCons) = readyToEvalSet.pop() unknownCons = set(self.relFeats) - set(lockedCons) - set(freeCons) # evaluate all candidate con and compute their minimum number of queries minNums = [(con, self.consProbs[con] * self.getQueryAndValue(lockedCons, set(freeCons).union({con}))[1]\ + (1 - self.consProbs[con]) * self.getQueryAndValue(set(lockedCons).union({con}), freeCons)[1]\ + 1) for con in unknownCons] # pick the tuple that has the minimum obj value after querying self.setQueryAndValue(lockedCons, freeCons, minNums) # add neighbors that ready to evaluate to readToEvalSet readyToEvalSet += filter(lambda (l, f): self.getQueryAndValue(l, f) == None and readyToEvaluate(l, f), [(set(lockedCons) - {cons}, freeCons) for cons in lockedCons] +\ [(lockedCons, set(freeCons) - {cons}) for cons in freeCons])
def findRelevantFeaturesBruteForce(self): allConsPowerset = set(powerset(self.unknownCons)) for subsetsToConsider in allConsPowerset: self.findConstrainedOptPi(subsetsToConsider)
def findRelevantFeaturesAndDomPis(self): """ Incrementally add dominating policies to a set DomPolicies algorithm in the IJCAI paper earlyStop: stop within this time and return whatever dompis found """ beta = [] # rules to keep dominatingPolicies = {} allCons = set() allConsPowerset = set(powerset(allCons)) subsetsConsidered = [] if config.earlyStop is None: # never stop before finding all dom pis terminateCond = lambda: False else: startTime = time.time() terminateCond = lambda: time.time() - startTime >= config.earlyStop # iterate until no more dominating policies are found while not terminateCond(): subsetsToConsider = allConsPowerset.difference(subsetsConsidered) if len(subsetsToConsider) == 0: break # find the subset with the smallest size activeCons = min(subsetsToConsider, key=lambda _: len(_)) if config.DEBUG: print 'activeCons', activeCons subsetsConsidered.append(activeCons) skipThisCons = False for enf, relax in beta: if enf.issubset(activeCons) and len( relax.intersection(activeCons)) == 0: # this subset can be ignored skipThisCons = True if config.DEBUG: print 'dominated' break if skipThisCons: continue # it will enforce activeCons and known locked features (inside) sol = self.findConstrainedOptPi(activeCons) if sol['feasible']: x = sol['pi'] if config.DEBUG: printOccSA(x) print self.computeValue(x) dominatingPolicies[activeCons] = x # check violated constraints violatedCons = self.findViolatedConstraints(x) if config.DEBUG: print 'this policy violates', violatedCons else: # infeasible violatedCons = () if config.DEBUG: print 'infeasible' # beta records that we would not enforce activeCons and relax occupiedFeats in the future beta.append((set(activeCons), set(violatedCons))) allCons.update(violatedCons) allConsPowerset = set(powerset(allCons)) domPis = [] for pi in dominatingPolicies.values(): if pi not in domPis: domPis.append(pi) # make sure returned values are lists allCons = list(allCons) if config.DEBUG: print 'rel cons', allCons, 'num of domPis', len(domPis) return allCons, domPis
return raii_tmpdir() def readdir_inode(dir): cmd = base_cmdline + [pjoin(basename, 'test', 'readdir_inode'), dir] with subprocess.Popen(cmd, stdout=subprocess.PIPE, universal_newlines=True) as proc: lines = proc.communicate()[0].splitlines() lines.sort() return lines @pytest.mark.parametrize( "cmdline_builder", (invoke_directly, invoke_mount_fuse, invoke_mount_fuse_drop_privileges)) @pytest.mark.parametrize("options", powerset(options)) @pytest.mark.parametrize("name", ('hello', 'hello_ll')) def test_hello(tmpdir, name, options, cmdline_builder, output_checker): mnt_dir = str(tmpdir) mount_process = subprocess.Popen(cmdline_builder(mnt_dir, name, options), stdout=output_checker.fd, stderr=output_checker.fd) try: wait_for_mount(mount_process, mnt_dir) assert os.listdir(mnt_dir) == ['hello'] filename = pjoin(mnt_dir, 'hello') with open(filename, 'r') as fh: assert fh.read() == 'Hello World!\n' with pytest.raises(IOError) as exc_info: open(filename, 'r+') assert exc_info.value.errno == errno.EACCES
def best(patterns,extended): # Enumberate daytimes dts = collections.defaultdict(set) all = [] for p in patterns: all.extend(p[0].each()) for p in all: for dt in p.getDayTimesRaw(): dts[dt.data()].add(p) # For each daytime, iterate patterns to find termweeks dt_tw = {} dt_tw_sz = {} for (dt,ps) in dts.iteritems(): tws = collections.defaultdict(set) for p in ps: for (term,week) in p.getTermWeeks().each(): tws[term].add(week) dt_tw[dt] = tws dt_tw_sz[dt] = reduce(lambda tot,item: tot+len(item),tws.itervalues(),0) # restrict to at most max_trials (longest) dt_use = set() dt_candidates = dt_tw.keys() for i in range(0,max_trials): if len(dt_candidates) == 0: break use = max(dt_candidates,key = lambda k: dt_tw_sz[k]) dt_candidates.remove(use) dt_use.add(use) # find longest range of each, using 1-8,9-16,17-24 type ranges to allow term overlap dt_longest = {} for dt in dt_use: # build termy week numbers (1-24) week_nums = set() for (term,weeks) in dt_tw[dt].iteritems(): for week in filter(lambda x: x>0 and x<9,weeks): week_nums.add(term*8+week) ranges = sorted(util.ranges(week_nums),key = lambda x: x[1],reverse = True) if len(ranges) == 0: dt_longest[dt] = set() else: dt_longest[dt] = set(range(ranges[0][0],ranges[0][0]+ranges[0][1])) # permute through including and excluding date ranges to see which gives best coverage (EXPONENTIAL!) best_score = None best = None for dts in util.powerset(dt_use): if len(dts) == 0: continue all = set(range(1,25)) for dt in dts: all &= dt_longest[dt] score = len(all) * len(dts) if best_score == None or score > best_score: best_score = score best = dts # Generate pattern if best is None: logger.error("No common in %s" % all) return None p = patternatom.PatternAtom(False) for b in best: p.addDayTimeRange(b[0],b[1][0],b[1][1],b[2][0],b[2][1]) p.setAllYear() # Extend to include out-of-term dates, where required if extended: for q in patterns: for qa in q[0].blast(): p.expand_back_to(qa) p.expand_forward_to(qa) return p