def chi2_dir(cause, effect, unknown, n, p_cause, p_effect_given_cause): cnt = count(zip(effect, unknown)) #print cnt chi_indep = chi2_contingency(cnt)[1] p_unknown_given_effect = [ float(cnt[0][1]) / sum(cnt[0]), float(cnt[1][1]) / sum(cnt[1]) ] #print 'p(bact|cd)=%s' % p_unknown_given_effect exp=[[0,0],[0,0]] for c in range(2): for e in range(2): for u in range(2): exp[c][u] += (n * p_of_val(p_cause, c) * p_of_val(p_effect_given_cause[c], e) * p_of_val(p_unknown_given_effect[e], u)) cnt = count(zip(cause, unknown)) #print "obs=%s" % cnt #print 'cnt=%s' % cnt #print 'expected if cd->bact=%s' % exp chi_rev = chisquare(cnt, exp, axis=None, ddof=2) chi_fwd = chi2_contingency(cnt) #print 'expected if bact->cd=%s' % chi_fwd[3] bayes_factor = chi2.pdf(chi_fwd[0],1) / chi2.pdf(chi_rev.statistic,1) return struct(reject_indep=chi_indep, bayes_fwd_rev=bayes_factor, reject_fwd=chi_fwd[1], reject_rev=chi_rev.pvalue)
def montecarlo(cause, effect, unknown, n, *ignore): cnt_cause = count(zip(cause)) cnt_unknown = count(zip(unknown)) cnt_cause_effect = count(zip(cause, effect)) cnt_effect_unknown = count(zip(effect, unknown)) sumarr(cnt_cause_effect, 0.1) # make beta dist work with zeros sumarr(cnt_cause, 0.1) sumarr(cnt_unknown, 0.1) sumarr(cnt_effect_unknown, 0.1) cnt_cause_unknown = count(zip(cause, unknown)) rounds = 500 p_overall = struct(cause_unknown_chain=[[0,0],[0,0]], cause_unknown_collide=[[0,0],[0,0]]) for i in range(rounds): p=struct() p.cause = 1-beta(*cnt_cause) p.unknown = 1-beta(*cnt_unknown) p.effect_given_cause = [1-beta(*cnts) for cnts in cnt_cause_effect] p.unknown_given_effect = [1-beta(*cnts) for cnts in cnt_effect_unknown] p = get_joints_by_model(p) acclarr(p_overall.cause_unknown_chain, p.cause_unknown_chain) acclarr(p_overall.cause_unknown_collide, p.cause_unknown_collide) mularr(p_overall.cause_unknown_chain, 1.0/rounds) mularr(p_overall.cause_unknown_collide, 1.0/rounds) try: bayes_factor = get_factor(p_overall, cnt_cause_unknown) except ValueError: print '==ValueError==' print p_overall.__dict__ raise ValueError() return struct(bayes_fwd_rev=bayes_factor)
def makegraph(items): graph = networkx.Graph() weights = {} n2i = {} for item in items: for link in extractd.getmessages(item): u, v = link[0], link[1] if u == v: continue uid = utils.getid(n2i, u) vid = utils.getid(n2i, v) graph.add_edge(uid, vid) utils.count(weights, (uid, vid)) utils.count(weights, (vid, uid)) weighted_edges = {} for e in graph.edges(): w = weights[(e[0], e[1])] if weights[(e[0], e[1])] <= weights[(e[1], e[0])] else weights[(e[1], e[0])] weighted_edges[ (e[0], e[1]) ] = w edges = utils.filter_gt(weighted_edges, 2) bigraph = networkx.Graph() for e in edges: bigraph.add_edge(e[0], e[1], weight = edges[e]) return bigraph, n2i
def play_round(grid): next_grid = copy.deepcopy(grid) changes = 0 for y in range(len(grid)): for x in range(len(grid[0])): position = grid[y][x] # L = empty # # = occupied # . = floor if position == "L": adjacent = get_adjacent(grid, x, y) occupied_adjacent = count("#", adjacent) if occupied_adjacent == 0: next_grid[y][x] = "#" changes += 1 elif position == "#": adjacent = get_adjacent(grid, x, y) occupied_adjacent = count("#", adjacent) if occupied_adjacent >= 4: next_grid[y][x] = "L" changes += 1 return next_grid, changes
def mle(a,b,cut,verbose=False): cntall = count(zip(a,b)) cntcut = count(zip(cut,a,b)) sumarr(cntall, 0.1) sumarr(cntcut, 0.1) p_b_given_a = [float(x[1])/sum(x) for x in cntall] p_a_given_b = [float(x[1])/sum(x) for x in zip(*cntall)] logbfs=[0,0] for cutv in range(2): cnt = cntcut[cutv] tot = sum([sum(l) for l in cnt]) p_a = float( sum(cnt[1]) ) / tot p_b = float( cnt[0][1] + cnt[1][1] ) / tot p_ab_given_cuts = [[0,0],[0,0]] p_ab_given_toucha = [[0,0],[0,0]] p_ab_given_touchb = [[0,0],[0,0]] for av in range(2): for bv in range(2): p_ab_given_cuts[av][bv] = p_of_val(p_a, av) * p_of_val(p_b, bv) p_ab_given_toucha[av][bv] = ( p_of_val(p_a, av) * p_of_val(p_b_given_a[av], bv) ) p_ab_given_touchb[av][bv] = ( p_of_val(p_b, bv) * p_of_val(p_a_given_b[bv], av) ) logp_obs_given_cuts = logp_obs_given(cnt, p_ab_given_cuts) alternatives=[p_ab_given_toucha, p_ab_given_touchb] for (i,alternative) in enumerate(alternatives): logp_obs_given_alt = logp_obs_given(cnt, alternative) logbf = logp_obs_given_cuts - logp_obs_given_alt logbfs[i] += logbf return exp(min(logbfs))
def nconflicts(self, var, val, assignment): """Return the number of conflicts var=val has with other variables.""" # Subclasses may implement this more efficiently def conflict(var2): return var2 in assignment and not self.constraints(var, val, var2, assignment[var2]) self.nconflt = self.nconflt + count(conflict(v) for v in self.neighbors[var]) return count(conflict(v) for v in self.neighbors[var])
def possible_words(rack, num): rack_count = count(rack) result = [] for word in legal_words: if len(word) != num: continue if subset(count(word), rack_count): result.append(word) result = sorted(result, cmp=lambda x,y: cmp(len(x), len(y))) return result
def mle(cause, effect, unknown, n, p_cause, p_effect_given_cause): p=struct(cause=p_cause, effect_given_cause=p_effect_given_cause) cnt = count(zip(effect, unknown)) chi_indep = chi2_contingency(cnt) p.unknown_given_effect = [ float(cnt[0][1]) / sum(cnt[0]), float(cnt[1][1]) / sum(cnt[1]) ] cnt = count(zip(unknown)) p.unknown = float(cnt[1]) / sum(cnt) p = get_joints_by_model(p) cnt = count(zip(cause, unknown)) bayes_factor = get_factor(p, cnt) return struct(reject_indep=chi_indep, bayes_fwd_rev=bayes_factor)
def test(self, test_data): correct, total = 0, 0 losses = list() self.model.eval() with torch.no_grad(): for i, (X, y) in enumerate(test_data): X, y = X.cuda(self.gpu, non_blocking=True), y.cuda(self.gpu, non_blocking=True) n, crop, _, _, _ = X.shape outputs = [] for j in range(crop): outputs.append(self.model(X[:, j, :, :, :])) outputs = torch.stack(outputs) output = torch.mean(outputs, dim=0) loss = self.loss_function(output, y) losses.append(loss.item()) correct += utils.count(output, y) total += y.size(0) self.model.train() return (100 * correct / total, sum(losses) / len(losses))
def summarize(sensor, timeframe, start, end): # prepare the database schema to use if timeframe == "hour": key_to_read = sensor["db_sensor"] key_to_write = sensor["db_sensor"] + ":hour" elif timeframe == "day": key_to_read = sensor["db_sensor"] + ":hour:avg" key_to_write = sensor["db_sensor"] + ":day" # retrieve from the database the data based on the given timeframe data = db.rangebyscore(key_to_read, start, end, withscores=True) # split between values and timestamps values = [] timestamps = [] for i in range(0, len(data)): timestamps.append(data[i][0]) values.append(data[i][1]) # calculate the derived values timestamp = start min = avg = max = rate = sum = count = count_unique = "-" if "avg" in sensor["summarize"] and sensor["summarize"]["avg"]: # calculate avg avg = utils.avg(values) db.deletebyscore(key_to_write + ":avg", start, end) db.set(key_to_write + ":avg", avg, timestamp) if "min_max" in sensor["summarize"] and sensor["summarize"]["min_max"]: # calculate min min = utils.min(values) db.deletebyscore(key_to_write + ":min", start, end) db.set(key_to_write + ":min", min, timestamp) # calculate max max = utils.max(values) db.deletebyscore(key_to_write + ":max", start, end) db.set(key_to_write + ":max", max, timestamp) if "rate" in sensor["summarize"] and sensor["summarize"]["rate"]: # calculate the rate of change rate = utils.velocity(timestamps, values) db.deletebyscore(key_to_write + ":rate", start, end) db.set(key_to_write + ":rate", rate, timestamp) if "sum" in sensor["summarize"] and sensor["summarize"]["sum"]: # calculate the sum sum = utils.sum(values) db.deletebyscore(key_to_write + ":sum", start, end) db.set(key_to_write + ":sum", sum, timestamp) if "count" in sensor["summarize"] and sensor["summarize"]["count"]: # count the values count = utils.count(values) db.deletebyscore(key_to_write + ":count", start, end) db.set(key_to_write + ":count", count, timestamp) if "count_unique" in sensor["summarize"] and sensor["summarize"][ "count_unique"]: # count the unique values count_unique = utils.count_unique(values) db.deletebyscore(key_to_write + ":count_unique", start, end) db.set(key_to_write + ":count_unique", count_unique, timestamp) log.debug("[" + sensor["module_id"] + "][" + sensor["group_id"] + "][" + sensor["sensor_id"] + "] (" + utils.timestamp2date(timestamp) + ") updating summary of the " + timeframe + " (min,avg,max,rate,sum,count,count_unique): (" + str(min) + "," + str(avg) + "," + str(max) + "," + str(rate) + "," + str(sum) + "," + str(count) + "," + str(count_unique) + ")")
def nconflicts(self, var, val, assignment): "Return the number of conflicts var=val has with other variables." # Subclasses may implement this more efficiently def conflict(var2): return (var2 in assignment and not self.constraints(var, val, var2, assignment[var2])) return count(conflict(v) for v in self.neighbors[var])
def model(): from PIL import Image import torch import yaml import numpy as np from utils import import_mod import torchvision.transforms as transforms import matplotlib.pyplot as plt from data import showLabel from utils import count with open("./config/config.yaml")as f: args = yaml.load(f) params_file = "params/" + args["model"] + "/params.ckpt" img = Image.open("data/image/12.jpg") transform = transforms.Compose([ transforms.Resize((128, 192)), transforms.ToTensor() ]) inputs = transform(img) inputs = inputs.unsqueeze(dim=0) model = import_mod("model." + args["model"])() model.load_state_dict(torch.load(params_file),strict=False) outputs = model(inputs) _, outputs = torch.max(outputs, 1) outputs = outputs.squeeze() outputs = showLabel(outputs, show=True) plt.imshow(outputs) plt.show() num = count(outputs) return outputs, num
def gini(cls, dataset): rst = utils.count(dataset) gini = 1.0 for r in rst: gini -= (rst[r] / len(dataset))**2 return gini
def _evaluate(eval_tree, dataset): eval_tree.results = utils.count(dataset) eval_tree.error = 0 for k, v in eval_tree.results.items(): if k != eval_tree.result: eval_tree.error += v # Leaf node if not (eval_tree.true_branch or eval_tree.false_branch): return eval_tree.error true_set = [] false_set = [] for data in dataset: v = data[eval_tree.feature] if isinstance(v, int) or isinstance(v, float): if v >= eval_tree.value: true_set.append(data) else: false_set.append(data) else: if v == eval_tree.value: true_set.append(data) else: false_set.append(data) return cls.evaluate(eval_tree.true_branch, true_set) + \ cls.evaluate(eval_tree.false_branch, false_set)
def play_game(players): game = Scrabble.Scrabble(len(players)) turn = 0 while game.get_winner() is None: current_player = players[game.current_player_index] print("\n\nTurn {}\n:".format(turn)) print("Player {} evaluating {} moves".format( game.current_player_index, utils.count(bruteforcer.all_moves(game)))) t0 = datetime.datetime.now() move = current_player.get_move(game) t1 = datetime.datetime.now() time = t1 - t0 current_player.time_taken += time game.apply_move(move) print(game.board) turn += 1 print("Player {} made {} points in time {}".format( game.current_player_index, game.last_move_score, time)) for i in range(len(players)): print("Player {}: {}, rack: {}".format( i, game.players[i].score, "".join([tile.letter for tile in game.players[i].rack]))) print("Winner: Player {}".format(game.get_winner())) for i, player in enumerate(players): print("Player {} total time: {}".format(i, player.time_taken)) print("Total bingos: {}".format(game.bingo_count))
def num_legal_values(csp, var, assignment): if csp.curr_domains: return len(csp.curr_domains[var]) else: return count( csp.nconflicts(var, val, assignment) == 0 for val in csp.domains[var])
def nconflicts(self, var, val, assignment): #return the number conflicts var=val has with other variables def conflict(var2): return (var2 in assignment and not self.constraints(var, val, var2, assignment[var2])) # print('val: ', val) # print('var: ', var) # print(self.neighbors) return count(conflict(v) for v in self.neighbors[var])
def entropy(cls, dataset): log2 = lambda x: log(x) / log(2) rst = utils.count(dataset) entropy = 0.0 for r in rst: p = float(rst[r]) / len(dataset) entropy -= p * log2(p) return entropy
def has_common_cause(vs,thresh): #print 'overall %s' % count(zip(*vs)) counts = [ count(zip(*rotate(vs, i))) for i in range(3) ] counts2 = [ count(zip(vs[i],vs[(i+1)%3])) for i in range(3) ] for i in range(3): p=chi2_contingency(counts2[i])[1] if p >= thresh: return False #print 'checking corr %s' % counts[i] corr = False for sv in range(2): p=chi2_contingency(counts[i][sv])[1] #print p if p < thresh: corr=True break if not corr: return False return True
def conditional(a, b, cut): apart = count(zip(cut, a, b)) together = count(zip(a,b)) just_b = count(zip(b)) p_b_given_a = [ float(row[1]) / sum(row) for row in together ] p_b = float(just_b[1]) / sum(just_b) print 'apart=%s p(b|a)=%s p(b)=%s' % (apart, p_b_given_a, p_b) score_sev = 0 score_nsev = 0 for cv in [0,1]: for av in [0,1]: for bv in [0,1]: if apart[cv][av][bv]==0: continue score_sev += log(p_of_val(p_b, bv)) * apart[cv][av][bv] score_nsev += log(p_of_val(p_b_given_a[av], bv)) * apart[cv][av][bv] print 'cnt(a=%d,b=%d)=%d => %.3g / %.3g' % (av,bv,apart[cv][av][bv],score_sev,score_nsev) print return exp(score_sev - score_nsev)
def test_PropKB(): kb = PropKB() assert count(kb.ask(expr) for expr in [A, C, D, E, Q]) is 0 kb.tell(A & E) assert kb.ask(A) == kb.ask(E) == {} kb.tell(E |'==>'| C) assert kb.ask(C) == {} kb.retract(E) assert kb.ask(E) is False assert kb.ask(C) is False
def test_PropKB(): kb = PropKB() assert count(kb.ask(expr) for expr in [A, C, D, E, Q]) is 0 kb.tell(A & E) assert kb.ask(A) == kb.ask(E) == {} kb.tell(E | '==>' | C) assert kb.ask(C) == {} kb.retract(E) assert not kb.ask(E) assert not kb.ask(C)
def num_legal_values(csp, var, assignment): #remaining values that can still work for variables # print(csp.curr_domains) # csp.display(assignment) if csp.curr_domains: return len(csp.curr_domains[var]) else: print(var) print(csp.domains) return count( csp.nconflicts(var, val, assignment) == 0 for val in csp.domains[var])
def __call__(self, fn, a, b, eps=1e-6): if self.method == 'dichotomy': algo = _dichotomy_search elif self.method == 'golden': algo = _gss elif self.method == 'fibonacci': algo = _fibonacci fn = count(fn) res = algo(fn, a, b, eps) res.update({'call_count': fn.count}) return res
def nconflicts(self, var, val, assignment): """Return the number of conflicts var=val has with other variables.""" # Subclasses may implement this more efficiently def conflict(var2): #if var2 in assignment: # print('var %d :val %d, var2 %d: val %d' % (var, val, var2, assignment[var2])) # print(var2 in assignment and # not self.constraints(var, val, var2, assignment[var2])) return (var2 in assignment and not self.constraints(var, val, var2, assignment[var2])) return count(conflict(v) for v in self.neighbors[var])
def importAll(self, sink, limit=None): url = self.getNextURL() while url is not None and len(url) > 0 and ((not self.options.test) or len(self.entities) < 30): try: entities = self.getEntitiesFromURL(url) if not sink.addEntities(entities): utils.log("Error storing %d entities to %s from %s" % \ (utils.count(entities), str(sink), url)) except: utils.log("Error crawling " + url + "\n") utils.printException()
def main(): sc = pyspark.SparkContext.getOrCreate() sqlContext = pyspark.SQLContext.getOrCreate(sc) df = ( sqlContext.read.format("bigquery") .option("viewsEnabled", "true") .option("table", "lor-data-platform-dev-f369:lor_dw.game_event") .load() ) # print(df.count()) print(count(df))
def montecarlo(a,b,cut,verbose=False): cntall = count(zip(a,b)) cntcut = count(zip(cut,a,b)) sumarr(cntall, 0.1) sumarr(cntcut, 0.1) # p_ab_given_indep = deepcopy(cntall) # mularr(p_ab_given_indep, 1.0/len(a)) logbfs=[0,0] runs=10 for cutv in range(2): cnt = cntcut[cutv] cnt_a = [sum(l) for l in cnt] cnt_b = [sum(l) for l in zip(*cnt)] tot = sum(cnt_a) p_ab_given_cuts = [[0,0],[0,0]] p_ab_given_toucha = [[0,0],[0,0]] p_ab_given_touchb = [[0,0],[0,0]] for i in range(runs): p_a = 1-beta(*cnt_a) p_b = 1-beta(*cnt_b) p_a_given_b = [ 1-beta(*l) for l in zip(*cntall) ] p_b_given_a = [ 1-beta(*l) for l in cntall ] for av in range(2): for bv in range(2): p_ab_given_cuts[av][bv] += p_of_val(p_a, av) * p_of_val(p_b, bv) / runs p_ab_given_toucha[av][bv] += ( p_of_val(p_a, av) * p_of_val(p_b_given_a[av], bv) ) / runs p_ab_given_touchb[av][bv] += ( p_of_val(p_b, bv) * p_of_val(p_a_given_b[bv], av) ) / runs logp_obs_given_cuts = logp_obs_given(cnt, p_ab_given_cuts) alternatives=[p_ab_given_toucha, p_ab_given_touchb] for (i,alternative) in enumerate(alternatives): logp_obs_given_alt = logp_obs_given(cnt, alternative) logbf = logp_obs_given_cuts - logp_obs_given_alt logbfs[i] += logbf return exp(min(logbfs))
def _receive_message(self, msg): query, key_val, coupon = msg query_name = query.raw_query.name if query_name not in self.coupons: self.coupons[query_name] = {} if key_val not in self.coupons[query_name]: self.coupons[query_name][key_val] = [False, [False] * query.m] self.coupons[query_name][key_val][1][coupon] = True if count(self.coupons[query_name][key_val] [1]) >= query.n and not self.coupons[query_name][key_val][0]: self.alert(query.raw_query, key_val) self.coupons[query_name][key_val][0] = True
def update_coupon_table(self, query, coupon, packet): query_name = query.raw_query.name if query_name not in self.coupons: self.coupons[query_name] = {} key_val = self.key_funcs[query.raw_query.key_index](packet) if key_val not in self.coupons[query_name]: self.coupons[query_name][key_val] = [False, [False] * query.m] self.coupons[query_name][key_val][1][coupon] = True if count(self.coupons[query_name][key_val] [1]) >= query.n and not self.coupons[query_name][key_val][0]: self.report_key(query, key_val) self.coupons[query_name][key_val][0] = True
def part1(): grid = read_input() changes = 1 rounds = 0 while changes != 0: grid, changes = play_round(grid) rounds += 1 #print(rounds, changes) flat_grid = list(np.concatenate(grid).flat) occupied_cnt = count("#", flat_grid) print(f"Day 11, part 1: {occupied_cnt}")
def train(self, train_data, test_data, save, epochs, lr, momentum, weight_decay): self.model.train() optimizer = optim.SGD(self.model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) for epoch in range(epochs): if epoch % self.epoch_print == 0: print('Epoch {} Started...'.format(epoch + 1)) for i, (X, y) in enumerate(train_data): X, y = X.cuda(self.gpu, non_blocking=True), y.cuda(self.gpu, non_blocking=True) output = self.model(X) loss = self.loss_function(output, y) optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % self.print_freq == 0: train_acc = 100 * utils.count(output, y) / y.size(0) test_acc, test_loss = self.test(test_data) self.train_losses.append(loss.item()) self.train_acc.append(train_acc) self.test_losses.append(test_loss) self.test_acc.append(test_acc) if epoch % self.epoch_print == 0: state = ( 'Iteration : {} - Train Loss : {:.4f}, Test Loss : {:.4f}, ' 'Train Acc : {:.4f}, Test Acc : {:.4f}').format( i + 1, loss.item(), test_loss, train_acc, test_acc) if test_acc > self.best_acc: print() print('*' * 35, 'Best Acc Updated', '*' * 35) print(state) self.best_acc = test_acc if save: torch.save(self.model.state_dict(), './best.pt') print('Saved Best Model') else: print(state)
def questions(request, **kwargs): page_data = [] session_data = {} if local.request.session['uid']: session_data['user_name'] = User.get_by_id(local.request.session['uid'])[0].name page = Page(session_data) if 'search' in request.args: questions_list = Question.search(request.args['search']) page.title = "Questions - '%s' - Meno" % request.args['search'] if 'sort' in request.args: sorts = { 'new': 'date_created', } sort_attr = sorts[request.args['sort']] questions_list = Question.get(order=(sort_attr, 'desc'), limit=30) else: page.title = 'Questions - Meno' questions_list = Question.get_latest(30) for question in questions_list: edit = question.latest_edit()[0] user = User.get_by_id(question.user_id)[0] age = question.age() stat = question.latest_status()[0] question_data = { 'question_id': str(question.id), 'user_id': str(question.user_id), 'views': str(question.views), 'votes': str(question.votes), 'date_created': str(question.created), 'category': str(Category.get_by_id(question.category_id)[0].name), 'answers_count': str(count(question.answers())), 'title': str(edit.title), 'user': str(user.name), 'status': str(stat.status), 'age': str("Asked %sh %sm %ss ago" % (age[0], age[0], age[1])), } page_data.append(question_data) content = QuestionsList(page_data) local.request.session['last'] = request.base_url return respond(page.render(content))
def update_graph(results_dump): # Update conditions if results_dump is None: raise PreventUpdate results = json.loads(results_dump) # compute ticks start_year = results["start_year"] end_year = results["end_year"] all_x = [(year, month) for year in range(start_year, end_year + 1) for month in range(1, 12 + 1)] # compute values # as map so we add zeros where they miss (todo: could be done with pandas probably) data = results["data"] if (len(data)): df = pd.DataFrame(data) utils.split_date(df) df_per_month = utils.per_month(df) per_month_count = utils.count(df_per_month) y_map = { year_month: per_month_count["count"].loc[year_month] for year_month in per_month_count.index } else: # no data y_map = {} y = [ y_map[year_month] if year_month in y_map else 0 for year_month in all_x ] # TODO: whats the correct value? x = ["{}-{}-01".format(year, month) for (year, month) in all_x] # list(range(0, len(y))) return { "data": [{ "x": x, "y": y, "type": "bar", }], "layout": { "xaxis": { "tickformat": "%Y/%m" } } }
def build_tree(cls, dataset, func): if len(dataset) == 0: return DecisionTree() best_gain = 0.0 best_feature = None best_split = None cur_score = func(dataset) feature_cnt = len(dataset[0]) - 1 results = utils.count(dataset) result = sorted(results.items(), key=lambda x: x[1], reverse=True)[0][0] error = 0 for k, v in results.items(): if k != result: error += v # Choose the best feature for i in range(feature_cnt): unique_values = list(set([data[i] for data in dataset])) for v in unique_values: true_set, false_set = cls._divide_set(dataset, i, v) p_true = float(len(true_set)) / len(dataset) p_false = 1 - p_true gain = cur_score - p_true * \ func(true_set) - p_false * func(false_set) if gain > best_gain and len(true_set) and len(false_set): best_gain = gain best_feature = (i, v) best_split = (true_set, false_set) if not best_gain: return DecisionTree(result=result, results=results, error=error) true_branch = cls.build_tree(best_split[0], func) false_branch = cls.build_tree(best_split[1], func) return DecisionTree(feature=best_feature[0], value=best_feature[1], \ true_branch=true_branch, false_branch=false_branch, \ result=result, results=results, error=error)
def _hello_world(): if request.method == 'POST': id = request.form["id"] linkdata = request.form["linkdata"] linktitle = request.form["linktitle"] try: model = data.requre(linkdata, id) if len(model) != 0: count_matrix,cosine_sim = utils.count(model) indices = [] for i in range(0, len(model), 1): indices.append(model[i]['Title']) title = data.title(linktitle, id) result = utils.recommend(title,cosine_sim, indices, model) return jsonify(result) except: return "Error" else : return "Hello, Flask!"
def test_emb(data): a = 0.01 data_emb = {} ingredient_emb = np.load("ingredient_emb.npy", allow_pickle=True) ingredient_dict = count(MAX_VOCAB_SIZE, data) # 得到食材字典表,key是食材,value是次数 for key in data: recipe_emb = {} for ingredient in data[key]: try: emb = ingredient_emb.item().get(ingredient) * a recipe_emb[ingredient] = emb / ( a + int(ingredient_dict[ingredient])) except: continue recipe = np.zeros(EMBEDDING_SIZE) for ingredient in recipe_emb: recipe = recipe + recipe_emb[ingredient] recipe = recipe / len(recipe_emb) data_emb[key] = recipe # np.save("recipe_emb_test.npy", data_emb) # 将生成的向量保存在字典文件中 return data_emb
def _sample(self, iterable, func, print_progress=True, progress_delta=5, max_retries=0, retry_delay=0.05): progress_count = 100 / progress_delta ratio = self.options.sampleSetRatio count = 0 index = 0 try: count = len(iterable) except: try: count = iterable.count() except: count = utils.count(iterable) for obj in iterable: if print_progress and (count < progress_count or 0 == (index % (count / progress_count))): utils.log("%s : %s" % (self.__class__.__name__, utils.getStatusStr(index, count))) if random.random() < ratio: noop = self.options.noop retries = 0 while True: try: self.options.noop = (retries < max_retries) or noop func(obj) break except Exception, e: utils.printException() retries += 1 if noop or retries > max_retries: prefix = "ERROR" if noop else "UNRESOLVABLE ERROR" utils.log("%s: %s" % (prefix, str(e))) break time.sleep(retry_delay) retry_delay *= 2 finally: self.options.noop = noop
def random_walk(board, start, trail, turn_func, end_func, orth, length=None): if isinstance(turn_func, float): turn_func = prob(turn_func) if length: end_func = lambda x: x > length elif isinstance(end_func, float): end_func = prob(end_func) pos = start # MARQUE START NO TABULEIRO step = random_step(board, pos, orth) for l in count(): valid_steps = available_steps(board, pos, orth) if not valid_steps: break # Cria sujeira em volta para não formar traços juntos for s in valid_steps: board[pos[0] + s[0]][pos[1] + s[1]] = .1 # Pára se bater em alguma coisa if step not in valid_steps: break pos[0] += step[0] pos[1] += step[1] board[pos[0]][pos[1]] = trail if end_func(l): break if turn_func(l): step = random_step(board, pos, orth) return pos
def _is_survivor(self, cell, neighbors_and_me): return (cell in self.live_cells and EXTINCTION_THRESHOLD < count(neighbors_and_me) - 1 < OVERPOPULATION_THRESHOLD)
else: logloss += log(1-post) for i in range(10000): net=struct() # net.a = (random()*.4)+.4 # net.b_given_a=[random()*.6+.2] # net.b_given_a.append((net.b_given_a[0]+(random()*.4+.4))%1) # c_given_b=[random()*.6+.2] # c_given_b.append((c_given_b[0]+(random()*.4+.4))%1) net.a = random() net.b_given_a = [random(), random()] c_given_b = [random(), random()] net.c_given_ab = [c_given_b] * 2 data = simulate(net, 100) cnt = count(zip(*data)) if any(cnt, lambda(x):x==0): continue try: # print "--- %s ---" % net # sev = severs(data[0], data[1], data[2]) # record(sev, False) # if sev>10: # print net # print "Severs:" # sev = severs(data[0], data[1], data[2]) # record(sev, False) # sev = severs(data[1], data[2], data[0]) # record(sev, False) sev = severs(data[0], data[2], data[1]) record(sev, True)
def p_given(a,b): cnt = count(zip(b,a)) return [ float(x[1]) / sum(x) for x in cnt ]
#!/usr/bin/python from simulate_causal_net import create_big_net, simulate_big_net from scipy.stats import chi2_contingency from utils import count from trio_test import has_common_cause vs = 6 net = create_big_net(vs, 3) data = simulate_big_net(net, 1000) for i in range(vs): for j in range(vs): if net[i][j]: cnt = count(zip(data[i], data[j])) p = chi2_contingency(cnt)[1] print '%d->%d %.2f p<%.2f' % (i,j,net[i][j], p) for i in range(vs): for j in range(vs): if i!=j and not net[i][j]: cnt = count(zip(data[i], data[j])) p = chi2_contingency(cnt)[1] if p<.05: print '%d,%d correlate' % (i,j) for i in range(vs): for j in range(i): for k in range(j):
def testCount(self): yes = utils.count('playtennis', 'Yes', self.dataset) no = utils.count('playtennis', 'No', self.dataset) assert yes == 9 assert no == 5
def _processItems(self, items): utils.log("[%s] processing %d items" % (self, utils.count(items))) AEntityProxy._processItems(self, items)
def feature(t_begin, t_end, screen_names): ngram = {} table = {} for j, u in enumerate(screen_names): query = { 'created_at': { '$gt': t_begin, '$lt': t_end }, 'screen_name': u } for item in db.find(query): text = item['text'] id = item['id'] try: replied_id = item['in_reply_to_status_id'] if replied_id: for ii in db.find({ 'id': replied_id }): text += u'。%s' % ii['text'] except KeyError: pass """ feats = bow.bagofwords(text) for f in feats: print(' '.join(f)) continue """ feat = extractd.getngram(text) for w in set(feat): if len(unicode(w)) < 2: continue if len(patterns.hiragana.findall(unicode(w))[0]) == len(unicode(w)): continue if w in patterns.english_words: continue if not w in ngram: ngram[w] = {} utils.count(ngram[w], u) try: #table[w].append(text) table[w].add(id) except KeyError: #table[w] = [ text ] table[w] = set([ id ]) tags = extractd.gethashtags(item) for t in set(tags): if not t in ngram: ngram[t] = {} utils.count(ngram[t], u) try: #table[t].append(text) table[t].add(id) except KeyError: #table[t] = [ text ] table[t] = set([ id ]) urls = extractd.geturls(item) for l in set(urls): if not l in ngram: ngram[l] = {} utils.count(ngram[l], u) try: #table[l].append(text) table[l].add(id) except KeyError: #table[l] = [ text ] table[l] = set([ id ]) print('%d/%d' % (j, len(screen_names))) return ngram, table
cats=20 def record(sev, truth): post = 1.0 / (1 + 1/sev) # prior=1/2 rpost = round(post*cats) tot[rpost] += 1 if truth: hit[rpost] += 1 for i in range(1): net=struct() net.a = random() net.b_given_a=[random(), random()] c_given_b = [random(), random()] net.c_given_ab = [c_given_b] * 2 net.d_given_a = [random(), random()] data = simulate(net, 1000) cnt = count(zip(*data)) if any(cnt, lambda(x):x==0): print 'net: %s' % net print 'skipping %s' % cnt continue bf = has_common_cause_bf(data[0:3]) record(bf, False) bf = has_common_cause_bf(data[1:4]) record(bf, True) for i in range(cats+1): print '%f: %f n= %d' % (i/float(cats), tot[i] and float(hit[i])/tot[i] or -1, tot[i])
def test_count(): import cv2 import matplotlib.pyplot as plt img = cv2.imread("data/result/1.jpg") from utils import count print(count(img))
def num_legal_values(csp, var, assignment): if csp.curr_domains: return len(csp.curr_domains[var]) else: return count(csp.nconflicts(var, val, assignment) == 0 for val in csp.domains[var])
def _is_newborn(self, cell, neighbors): return cell not in self.live_cells and count(neighbors) == REPRODUCTION_CONDITION
import json import utils if __name__ == '__main__': cost = [] for group in utils.listGroups(): for fn in utils.listGroupInstances(group): print(fn) topology = fn.split('.')[0] data = utils.read_data(group, 'edgeDisjointPaths', topology) assert data != None, topology res = data['results'] for r in res: cost.append(r['maxSeg']) cost = utils.count(cost) cost = utils.dict_to_bar(cost) cost = utils.array_to_percent(cost) s = 0 for i in range(6, len(cost)): s += cost[i] print(s) groups = [str(i) for i in range(len(cost))] utils.make_g_barplot([cost], groups, ['seg cost'], ['#3CAEA3'], 'segment cost', 'percentage of topologies', '', '../data/plot/minCostEDP_segcost.eps', 5) """ ax = plt.subplot() plot(x, y) plt.xlabel("Topology size |G|") plt.ylabel("Runtime in seconds")
for species in data.bacteria: vals = data.get_data(pl, 'fractions', species, bucketizer=lambda(x):int(log(x,10))) ent[species] = utils.entropy(vals) spe=ent.keys() spe.sort(key=lambda x: ent[x], reverse=True) species_by_entropy = spe pvs={} for species in data.bacteria: vals = data.get_data(pl, 'fractions', species) co = utils.findcutoff(vals, sick) if co.sick_when_more==None: continue boolvals = [(i>co.threshold)==(co.sick_when_more) for i in vals] cnts = utils.count(zip(sick, boolvals)) try: pvs[species]=chi2_contingency(cnts)[1] except ValueError: pass spe=pvs.keys() spe.sort(key=lambda x: pvs[x]) filt2 = [x for x in species_by_entropy if x in pvs and pvs[x]<.1] print len(species_by_entropy) print len(pvs) print len(ent) print len(filt2) examples=[filt2[i] for i in range(0,20,3)]
# import utils import manip import enrich import numpy as np import pandas as pd utils.block_operator("data/SampleTextFile_1000kb.txt", utils.do_func(print)) print("\n") print("Counting words in file:") print("Words: ") print(str(utils.count("data/SampleTextFile_1000kb.txt", "word"))) print("Lines: ") print(str(utils.count("data/SampleTextFile_1000kb.txt", "line"))) print("All Characters in document: ") print(str(utils.count("data/SampleTextFile_1000kb.txt"))) print("\n") print("Use a regex to count the number of white spaces in the file:") print(str(manip.count_matches("data/SampleTextFile_1000kb.txt", r'\s'))) print("Count the number of times 'er' appears at the end of a word:") print(str(manip.count_matches("data/SampleTextFile_1000kb.txt", r'er\b'))) print("\n") print("Substitute all instances of 'er' at a word-boundary with 'as'") manip.sub_and_write("data/SampleTextFile_1000kb.txt", r'er\b', "as")
def num_legal_values(csp, var, assignment, forward_checking): if csp.curr_domains and forward_checking: return len(csp.curr_domains[var]) else: return count(csp.nconflicts(var, val, assignment) == 0 for val in csp.domains[var])
print 'loading words...' words = [line.strip().upper() for line in open('TWL06.txt')] print 'done.' ''' Looks like this expects you to tell it who owns each tile, after loading the board. TODO(durandal): document this? ''' import sys board = None while True: print '$ ', line = sys.stdin.readline() if not line: break line = line.strip() if line[0] is '`': board = open('games/%s.txt' % line[1:]).readline().strip().upper() print '"%s"' % board continue available = count_fancy(board,line) for k,v in available.iteritems(): available[k].sort() available[k].reverse() print available legal = [(score_play(word,available), word) for word in words if is_subset(count(word), available)] if len(legal) == 0: print 'no legal moves' for score,word in sorted(legal): print score,word