Exemplo n.º 1
0
def chi2_dir(cause, effect, unknown, n, p_cause, p_effect_given_cause):
    cnt = count(zip(effect, unknown))
    #print cnt
    chi_indep = chi2_contingency(cnt)[1]
    p_unknown_given_effect = [ float(cnt[0][1]) / sum(cnt[0]),
                               float(cnt[1][1]) / sum(cnt[1]) ]
    #print 'p(bact|cd)=%s' % p_unknown_given_effect
    exp=[[0,0],[0,0]]
    for c in range(2):
        for e in range(2):
            for u in range(2):
                exp[c][u] += (n * 
                              p_of_val(p_cause, c) *
                              p_of_val(p_effect_given_cause[c], e) *
                              p_of_val(p_unknown_given_effect[e], u))
    cnt = count(zip(cause, unknown))
    #print "obs=%s" % cnt
    #print 'cnt=%s' % cnt
    #print 'expected if cd->bact=%s' % exp
    chi_rev = chisquare(cnt, exp, axis=None, ddof=2)
    chi_fwd = chi2_contingency(cnt)
    #print 'expected if bact->cd=%s' % chi_fwd[3]
    bayes_factor = chi2.pdf(chi_fwd[0],1) / chi2.pdf(chi_rev.statistic,1)
    return struct(reject_indep=chi_indep,
                  bayes_fwd_rev=bayes_factor,
                  reject_fwd=chi_fwd[1],
                  reject_rev=chi_rev.pvalue)
Exemplo n.º 2
0
def montecarlo(cause, effect, unknown, n, *ignore):
    cnt_cause = count(zip(cause))
    cnt_unknown = count(zip(unknown))
    cnt_cause_effect = count(zip(cause, effect))
    cnt_effect_unknown = count(zip(effect, unknown))
    sumarr(cnt_cause_effect, 0.1) # make beta dist work with zeros
    sumarr(cnt_cause, 0.1)
    sumarr(cnt_unknown, 0.1)
    sumarr(cnt_effect_unknown, 0.1)
    cnt_cause_unknown = count(zip(cause, unknown))
    rounds = 500
    p_overall = struct(cause_unknown_chain=[[0,0],[0,0]],
                       cause_unknown_collide=[[0,0],[0,0]])
    for i in range(rounds):
        p=struct()
        p.cause = 1-beta(*cnt_cause)
        p.unknown = 1-beta(*cnt_unknown)
        p.effect_given_cause = [1-beta(*cnts) for cnts in cnt_cause_effect]
        p.unknown_given_effect = [1-beta(*cnts) for cnts in cnt_effect_unknown]
        p = get_joints_by_model(p)
        acclarr(p_overall.cause_unknown_chain, p.cause_unknown_chain)
        acclarr(p_overall.cause_unknown_collide, p.cause_unknown_collide)
    mularr(p_overall.cause_unknown_chain, 1.0/rounds)
    mularr(p_overall.cause_unknown_collide, 1.0/rounds)
    try:
        bayes_factor = get_factor(p_overall, cnt_cause_unknown)
    except ValueError:
        print '==ValueError=='
        print p_overall.__dict__
        raise ValueError()
    return struct(bayes_fwd_rev=bayes_factor)
Exemplo n.º 3
0
def makegraph(items):
    
    
    graph = networkx.Graph()
    weights = {}
    n2i = {}

    for item in items:
        for link in extractd.getmessages(item):
            u, v = link[0], link[1]
            if u == v:
                continue

            uid = utils.getid(n2i, u)
            vid = utils.getid(n2i, v)

            graph.add_edge(uid, vid)
            
            utils.count(weights, (uid, vid))
            utils.count(weights, (vid, uid))
   
    weighted_edges = {}
    for e in graph.edges():
        w = weights[(e[0], e[1])] if weights[(e[0], e[1])] <= weights[(e[1], e[0])] else weights[(e[1], e[0])]
        weighted_edges[ (e[0], e[1]) ] = w

    edges = utils.filter_gt(weighted_edges, 2)

    bigraph = networkx.Graph()
    for e in edges:
        bigraph.add_edge(e[0], e[1], weight = edges[e])
    
    return bigraph, n2i
Exemplo n.º 4
0
def play_round(grid):
    next_grid = copy.deepcopy(grid)
    changes = 0

    for y in range(len(grid)):
        for x in range(len(grid[0])):
            position = grid[y][x]

            # L = empty
            # # = occupied
            # . = floor

            if position == "L":
                adjacent = get_adjacent(grid, x, y)
                occupied_adjacent = count("#", adjacent)

                if occupied_adjacent == 0:
                    next_grid[y][x] = "#"
                    changes += 1
            elif position == "#":
                adjacent = get_adjacent(grid, x, y)
                occupied_adjacent = count("#", adjacent)

                if occupied_adjacent >= 4:
                    next_grid[y][x] = "L"
                    changes += 1

    return next_grid, changes
Exemplo n.º 5
0
def mle(a,b,cut,verbose=False):
    cntall = count(zip(a,b))
    cntcut = count(zip(cut,a,b))
    sumarr(cntall, 0.1)
    sumarr(cntcut, 0.1)
    p_b_given_a = [float(x[1])/sum(x) for x in cntall]
    p_a_given_b = [float(x[1])/sum(x) for x in zip(*cntall)]
    logbfs=[0,0]
    for cutv in range(2):
        cnt = cntcut[cutv]
        tot = sum([sum(l) for l in cnt])
        p_a = float( sum(cnt[1]) ) / tot
        p_b = float( cnt[0][1] + cnt[1][1] ) / tot
        p_ab_given_cuts = [[0,0],[0,0]]
        p_ab_given_toucha = [[0,0],[0,0]]
        p_ab_given_touchb = [[0,0],[0,0]]
        for av in range(2):
            for bv in range(2):
                p_ab_given_cuts[av][bv] = p_of_val(p_a, av) * p_of_val(p_b, bv)
                p_ab_given_toucha[av][bv] = ( p_of_val(p_a, av) *
                                              p_of_val(p_b_given_a[av], bv) )
                p_ab_given_touchb[av][bv] = ( p_of_val(p_b, bv) *
                                              p_of_val(p_a_given_b[bv], av) )
        logp_obs_given_cuts = logp_obs_given(cnt, p_ab_given_cuts)
        alternatives=[p_ab_given_toucha, p_ab_given_touchb]
        for (i,alternative) in enumerate(alternatives):
            logp_obs_given_alt = logp_obs_given(cnt, alternative)
            logbf = logp_obs_given_cuts - logp_obs_given_alt
            logbfs[i] += logbf
    return exp(min(logbfs))
Exemplo n.º 6
0
    def nconflicts(self, var, val, assignment):
        """Return the number of conflicts var=val has with other variables."""

        # Subclasses may implement this more efficiently
        def conflict(var2):
            return var2 in assignment and not self.constraints(var, val, var2, assignment[var2])
        self.nconflt = self.nconflt + count(conflict(v) for v in self.neighbors[var])
        return count(conflict(v) for v in self.neighbors[var])
Exemplo n.º 7
0
def possible_words(rack, num):
    rack_count = count(rack)
    result = []
    for word in legal_words:
        if len(word) != num: continue
        if subset(count(word), rack_count):
            result.append(word)
    result = sorted(result, cmp=lambda x,y: cmp(len(x), len(y)))
    return result
Exemplo n.º 8
0
def mle(cause, effect, unknown, n, p_cause, p_effect_given_cause):
    p=struct(cause=p_cause, effect_given_cause=p_effect_given_cause)
    cnt = count(zip(effect, unknown))
    chi_indep = chi2_contingency(cnt)
    p.unknown_given_effect = [ float(cnt[0][1]) / sum(cnt[0]),
                               float(cnt[1][1]) / sum(cnt[1]) ]
    cnt = count(zip(unknown))
    p.unknown = float(cnt[1]) / sum(cnt)
    p = get_joints_by_model(p)
    cnt = count(zip(cause, unknown))
    bayes_factor = get_factor(p, cnt)
    return struct(reject_indep=chi_indep,
                  bayes_fwd_rev=bayes_factor)
Exemplo n.º 9
0
    def test(self, test_data):
        correct, total = 0, 0
        losses = list()

        self.model.eval()
        with torch.no_grad():
            for i, (X, y) in enumerate(test_data):
                X, y = X.cuda(self.gpu,
                              non_blocking=True), y.cuda(self.gpu,
                                                         non_blocking=True)
                n, crop, _, _, _ = X.shape
                outputs = []
                for j in range(crop):
                    outputs.append(self.model(X[:, j, :, :, :]))
                outputs = torch.stack(outputs)
                output = torch.mean(outputs, dim=0)

                loss = self.loss_function(output, y)
                losses.append(loss.item())

                correct += utils.count(output, y)
                total += y.size(0)

        self.model.train()
        return (100 * correct / total, sum(losses) / len(losses))
Exemplo n.º 10
0
def summarize(sensor, timeframe, start, end):
    # prepare the database schema to use
    if timeframe == "hour":
        key_to_read = sensor["db_sensor"]
        key_to_write = sensor["db_sensor"] + ":hour"
    elif timeframe == "day":
        key_to_read = sensor["db_sensor"] + ":hour:avg"
        key_to_write = sensor["db_sensor"] + ":day"
    # retrieve from the database the data based on the given timeframe
    data = db.rangebyscore(key_to_read, start, end, withscores=True)
    # split between values and timestamps
    values = []
    timestamps = []
    for i in range(0, len(data)):
        timestamps.append(data[i][0])
        values.append(data[i][1])
    # calculate the derived values
    timestamp = start
    min = avg = max = rate = sum = count = count_unique = "-"
    if "avg" in sensor["summarize"] and sensor["summarize"]["avg"]:
        # calculate avg
        avg = utils.avg(values)
        db.deletebyscore(key_to_write + ":avg", start, end)
        db.set(key_to_write + ":avg", avg, timestamp)
    if "min_max" in sensor["summarize"] and sensor["summarize"]["min_max"]:
        # calculate min
        min = utils.min(values)
        db.deletebyscore(key_to_write + ":min", start, end)
        db.set(key_to_write + ":min", min, timestamp)
        # calculate max
        max = utils.max(values)
        db.deletebyscore(key_to_write + ":max", start, end)
        db.set(key_to_write + ":max", max, timestamp)
    if "rate" in sensor["summarize"] and sensor["summarize"]["rate"]:
        # calculate the rate of change
        rate = utils.velocity(timestamps, values)
        db.deletebyscore(key_to_write + ":rate", start, end)
        db.set(key_to_write + ":rate", rate, timestamp)
    if "sum" in sensor["summarize"] and sensor["summarize"]["sum"]:
        # calculate the sum
        sum = utils.sum(values)
        db.deletebyscore(key_to_write + ":sum", start, end)
        db.set(key_to_write + ":sum", sum, timestamp)
    if "count" in sensor["summarize"] and sensor["summarize"]["count"]:
        # count the values
        count = utils.count(values)
        db.deletebyscore(key_to_write + ":count", start, end)
        db.set(key_to_write + ":count", count, timestamp)
    if "count_unique" in sensor["summarize"] and sensor["summarize"][
            "count_unique"]:
        # count the unique values
        count_unique = utils.count_unique(values)
        db.deletebyscore(key_to_write + ":count_unique", start, end)
        db.set(key_to_write + ":count_unique", count_unique, timestamp)
    log.debug("[" + sensor["module_id"] + "][" + sensor["group_id"] + "][" +
              sensor["sensor_id"] + "] (" + utils.timestamp2date(timestamp) +
              ") updating summary of the " + timeframe +
              " (min,avg,max,rate,sum,count,count_unique): (" + str(min) +
              "," + str(avg) + "," + str(max) + "," + str(rate) + "," +
              str(sum) + "," + str(count) + "," + str(count_unique) + ")")
Exemplo n.º 11
0
 def nconflicts(self, var, val, assignment):
     "Return the number of conflicts var=val has with other variables."
     # Subclasses may implement this more efficiently
     def conflict(var2):
         return (var2 in assignment and
                 not self.constraints(var, val, var2, assignment[var2]))
     return count(conflict(v) for v in self.neighbors[var])
Exemplo n.º 12
0
def model():
    from PIL import Image
    import torch
    import yaml
    import numpy as np
    from utils import import_mod
    import torchvision.transforms as transforms
    import matplotlib.pyplot as plt
    from data import showLabel
    from utils import count

    with open("./config/config.yaml")as f:
        args = yaml.load(f)
        params_file = "params/" + args["model"] + "/params.ckpt"
    
    img = Image.open("data/image/12.jpg")
    transform = transforms.Compose([
                transforms.Resize((128, 192)),
                transforms.ToTensor()
    ])
    inputs = transform(img)
    inputs = inputs.unsqueeze(dim=0)
    model = import_mod("model." + args["model"])()
    model.load_state_dict(torch.load(params_file),strict=False)
    outputs = model(inputs)
    _, outputs = torch.max(outputs, 1)
    outputs = outputs.squeeze()
    outputs = showLabel(outputs, show=True)
    plt.imshow(outputs)
    plt.show()
    num = count(outputs)
    return outputs, num
Exemplo n.º 13
0
    def gini(cls, dataset):
        rst = utils.count(dataset)
        gini = 1.0

        for r in rst:
            gini -= (rst[r] / len(dataset))**2
        return gini
Exemplo n.º 14
0
        def _evaluate(eval_tree, dataset):
            eval_tree.results = utils.count(dataset)
            eval_tree.error = 0
            for k, v in eval_tree.results.items():
                if k != eval_tree.result:
                    eval_tree.error += v

            # Leaf node
            if not (eval_tree.true_branch or eval_tree.false_branch):
                return eval_tree.error

            true_set = []
            false_set = []
            for data in dataset:
                v = data[eval_tree.feature]
                if isinstance(v, int) or isinstance(v, float):
                    if v >= eval_tree.value:
                        true_set.append(data)
                    else:
                        false_set.append(data)
                else:
                    if v == eval_tree.value:
                        true_set.append(data)
                    else:
                        false_set.append(data)
            return cls.evaluate(eval_tree.true_branch, true_set) + \
                    cls.evaluate(eval_tree.false_branch, false_set)
Exemplo n.º 15
0
def play_game(players):
    game = Scrabble.Scrabble(len(players))

    turn = 0
    while game.get_winner() is None:
        current_player = players[game.current_player_index]
        print("\n\nTurn {}\n:".format(turn))

        print("Player {} evaluating {} moves".format(
            game.current_player_index,
            utils.count(bruteforcer.all_moves(game))))
        t0 = datetime.datetime.now()
        move = current_player.get_move(game)
        t1 = datetime.datetime.now()

        time = t1 - t0
        current_player.time_taken += time
        game.apply_move(move)
        print(game.board)
        turn += 1
        print("Player {} made {} points in time {}".format(
            game.current_player_index, game.last_move_score, time))
        for i in range(len(players)):
            print("Player {}: {}, rack: {}".format(
                i, game.players[i].score,
                "".join([tile.letter for tile in game.players[i].rack])))

    print("Winner: Player {}".format(game.get_winner()))
    for i, player in enumerate(players):
        print("Player {} total time: {}".format(i, player.time_taken))
    print("Total bingos: {}".format(game.bingo_count))
Exemplo n.º 16
0
def num_legal_values(csp, var, assignment):
    if csp.curr_domains:
        return len(csp.curr_domains[var])
    else:
        return count(
            csp.nconflicts(var, val, assignment) == 0
            for val in csp.domains[var])
Exemplo n.º 17
0
 def nconflicts(self, var, val, assignment):
     #return the number conflicts var=val has with other variables
     def conflict(var2):
         return (var2 in assignment and not self.constraints(var, val, var2, assignment[var2]))
     # print('val: ', val)
     # print('var: ', var)
     # print(self.neighbors)
     return count(conflict(v) for v in self.neighbors[var]) 
Exemplo n.º 18
0
    def entropy(cls, dataset):
        log2 = lambda x: log(x) / log(2)
        rst = utils.count(dataset)
        entropy = 0.0

        for r in rst:
            p = float(rst[r]) / len(dataset)
            entropy -= p * log2(p)
        return entropy
Exemplo n.º 19
0
def has_common_cause(vs,thresh):
    #print 'overall %s' % count(zip(*vs))
    counts = [ count(zip(*rotate(vs, i))) for i in range(3) ]
    counts2 = [ count(zip(vs[i],vs[(i+1)%3])) for i in range(3) ]
    for i in range(3):
        p=chi2_contingency(counts2[i])[1]
        if p >= thresh:
            return False
        #print 'checking corr %s' % counts[i]
        corr = False
        for sv in range(2):
            p=chi2_contingency(counts[i][sv])[1]
            #print p
            if p < thresh:
                corr=True
                break
        if not corr:
            return False
    return True
Exemplo n.º 20
0
def conditional(a, b, cut):
    apart = count(zip(cut, a, b))
    together = count(zip(a,b))
    just_b = count(zip(b))
    p_b_given_a = [ float(row[1]) / sum(row) for row in together ]
    p_b = float(just_b[1]) / sum(just_b)
    print 'apart=%s p(b|a)=%s p(b)=%s' % (apart, p_b_given_a, p_b)
    score_sev = 0
    score_nsev = 0
    for cv in [0,1]:
        for av in [0,1]:
            for bv in [0,1]:
                if apart[cv][av][bv]==0:
                    continue
                score_sev += log(p_of_val(p_b, bv)) * apart[cv][av][bv]
                score_nsev += log(p_of_val(p_b_given_a[av], bv)) * apart[cv][av][bv]
                print 'cnt(a=%d,b=%d)=%d => %.3g / %.3g' % (av,bv,apart[cv][av][bv],score_sev,score_nsev)
    print
    return exp(score_sev - score_nsev)
Exemplo n.º 21
0
def test_PropKB():
    kb = PropKB()
    assert count(kb.ask(expr) for expr in [A, C, D, E, Q]) is 0
    kb.tell(A & E)
    assert kb.ask(A) == kb.ask(E) == {}
    kb.tell(E |'==>'| C)
    assert kb.ask(C) == {}
    kb.retract(E)
    assert kb.ask(E) is False
    assert kb.ask(C) is False
Exemplo n.º 22
0
def test_PropKB():
    kb = PropKB()
    assert count(kb.ask(expr) for expr in [A, C, D, E, Q]) is 0
    kb.tell(A & E)
    assert kb.ask(A) == kb.ask(E) == {}
    kb.tell(E | '==>' | C)
    assert kb.ask(C) == {}
    kb.retract(E)
    assert not kb.ask(E)
    assert not kb.ask(C)
Exemplo n.º 23
0
def num_legal_values(csp, var, assignment):
    #remaining values that can still work for variables
    # print(csp.curr_domains)
    # csp.display(assignment)
    if csp.curr_domains:
        return len(csp.curr_domains[var])
    else:
        print(var)
        print(csp.domains)
        return count(
            csp.nconflicts(var, val, assignment) == 0
            for val in csp.domains[var])
Exemplo n.º 24
0
    def __call__(self, fn, a, b, eps=1e-6):

        if self.method == 'dichotomy':
            algo = _dichotomy_search
        elif self.method == 'golden':
            algo = _gss
        elif self.method == 'fibonacci':
            algo = _fibonacci
        fn = count(fn)
        res = algo(fn, a, b, eps)
        res.update({'call_count': fn.count})
        return res
Exemplo n.º 25
0
    def nconflicts(self, var, val, assignment):
        """Return the number of conflicts var=val has with other variables."""
        # Subclasses may implement this more efficiently
        def conflict(var2):
            #if var2 in assignment:
               # print('var %d :val %d, var2 %d: val %d' % (var, val, var2, assignment[var2]))
               # print(var2 in assignment and
               #         not self.constraints(var, val, var2, assignment[var2]))

            return (var2 in assignment and
                    not self.constraints(var, val, var2, assignment[var2]))
        return count(conflict(v) for v in self.neighbors[var])
Exemplo n.º 26
0
 def importAll(self, sink, limit=None):
     url = self.getNextURL()
     
     while url is not None and len(url) > 0 and ((not self.options.test) or len(self.entities) < 30):
         try:
             entities = self.getEntitiesFromURL(url)
             
             if not sink.addEntities(entities):
                 utils.log("Error storing %d entities to %s from %s" % \
                         (utils.count(entities), str(sink), url))
         except:
             utils.log("Error crawling " + url + "\n")
             utils.printException()
def main():
    sc = pyspark.SparkContext.getOrCreate()
    sqlContext = pyspark.SQLContext.getOrCreate(sc)

    df = (
        sqlContext.read.format("bigquery")
        .option("viewsEnabled", "true")
        .option("table", "lor-data-platform-dev-f369:lor_dw.game_event")
        .load()
    )

    # print(df.count())
    print(count(df))
Exemplo n.º 28
0
def montecarlo(a,b,cut,verbose=False):
    cntall = count(zip(a,b))
    cntcut = count(zip(cut,a,b))
    sumarr(cntall, 0.1)
    sumarr(cntcut, 0.1)

#    p_ab_given_indep = deepcopy(cntall)
#    mularr(p_ab_given_indep, 1.0/len(a))

    logbfs=[0,0]
    runs=10
    for cutv in range(2):
        cnt = cntcut[cutv]
        cnt_a = [sum(l) for l in cnt]
        cnt_b = [sum(l) for l in zip(*cnt)]
        tot = sum(cnt_a)
        p_ab_given_cuts = [[0,0],[0,0]]
        p_ab_given_toucha = [[0,0],[0,0]]
        p_ab_given_touchb = [[0,0],[0,0]]
        for i in range(runs):
            p_a = 1-beta(*cnt_a)
            p_b = 1-beta(*cnt_b)
            p_a_given_b = [ 1-beta(*l) for l in zip(*cntall) ]
            p_b_given_a = [ 1-beta(*l) for l in cntall ]
            for av in range(2):
                for bv in range(2):
                    p_ab_given_cuts[av][bv] += p_of_val(p_a, av) * p_of_val(p_b, bv) / runs
                    p_ab_given_toucha[av][bv] += ( p_of_val(p_a, av) *
                                                   p_of_val(p_b_given_a[av], bv) ) / runs
                    p_ab_given_touchb[av][bv] += ( p_of_val(p_b, bv) *
                                                   p_of_val(p_a_given_b[bv], av) ) / runs
        logp_obs_given_cuts = logp_obs_given(cnt, p_ab_given_cuts)
        alternatives=[p_ab_given_toucha, p_ab_given_touchb]
        for (i,alternative) in enumerate(alternatives):
            logp_obs_given_alt = logp_obs_given(cnt, alternative)
            logbf = logp_obs_given_cuts - logp_obs_given_alt
            logbfs[i] += logbf
    return exp(min(logbfs))
Exemplo n.º 29
0
    def _receive_message(self, msg):
        query, key_val, coupon = msg
        query_name = query.raw_query.name
        if query_name not in self.coupons:
            self.coupons[query_name] = {}

        if key_val not in self.coupons[query_name]:
            self.coupons[query_name][key_val] = [False, [False] * query.m]

        self.coupons[query_name][key_val][1][coupon] = True
        if count(self.coupons[query_name][key_val]
                 [1]) >= query.n and not self.coupons[query_name][key_val][0]:
            self.alert(query.raw_query, key_val)
            self.coupons[query_name][key_val][0] = True
Exemplo n.º 30
0
    def update_coupon_table(self, query, coupon, packet):
        query_name = query.raw_query.name
        if query_name not in self.coupons:
            self.coupons[query_name] = {}

        key_val = self.key_funcs[query.raw_query.key_index](packet)
        if key_val not in self.coupons[query_name]:
            self.coupons[query_name][key_val] = [False, [False] * query.m]

        self.coupons[query_name][key_val][1][coupon] = True
        if count(self.coupons[query_name][key_val]
                 [1]) >= query.n and not self.coupons[query_name][key_val][0]:
            self.report_key(query, key_val)
            self.coupons[query_name][key_val][0] = True
Exemplo n.º 31
0
def part1():
    grid = read_input()
    changes = 1
    rounds = 0

    while changes != 0:
        grid, changes = play_round(grid)
        rounds += 1
        #print(rounds, changes)

    flat_grid = list(np.concatenate(grid).flat)
    occupied_cnt = count("#", flat_grid)

    print(f"Day 11, part 1: {occupied_cnt}")
Exemplo n.º 32
0
    def train(self, train_data, test_data, save, epochs, lr, momentum,
              weight_decay):
        self.model.train()
        optimizer = optim.SGD(self.model.parameters(),
                              lr,
                              momentum=momentum,
                              weight_decay=weight_decay)

        for epoch in range(epochs):
            if epoch % self.epoch_print == 0:
                print('Epoch {} Started...'.format(epoch + 1))
            for i, (X, y) in enumerate(train_data):
                X, y = X.cuda(self.gpu,
                              non_blocking=True), y.cuda(self.gpu,
                                                         non_blocking=True)
                output = self.model(X)
                loss = self.loss_function(output, y)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if (i + 1) % self.print_freq == 0:
                    train_acc = 100 * utils.count(output, y) / y.size(0)
                    test_acc, test_loss = self.test(test_data)

                    self.train_losses.append(loss.item())
                    self.train_acc.append(train_acc)
                    self.test_losses.append(test_loss)
                    self.test_acc.append(test_acc)

                    if epoch % self.epoch_print == 0:
                        state = (
                            'Iteration : {} - Train Loss : {:.4f}, Test Loss : {:.4f}, '
                            'Train Acc : {:.4f}, Test Acc : {:.4f}').format(
                                i + 1, loss.item(), test_loss, train_acc,
                                test_acc)
                        if test_acc > self.best_acc:
                            print()
                            print('*' * 35, 'Best Acc Updated', '*' * 35)
                            print(state)
                            self.best_acc = test_acc
                            if save:
                                torch.save(self.model.state_dict(),
                                           './best.pt')
                                print('Saved Best Model')
                        else:
                            print(state)
Exemplo n.º 33
0
def questions(request, **kwargs):
    page_data = []
    session_data = {}
    if local.request.session['uid']:
        session_data['user_name'] = User.get_by_id(local.request.session['uid'])[0].name

    page = Page(session_data)
    if 'search' in request.args:
        questions_list = Question.search(request.args['search'])
        page.title = "Questions - '%s' - Meno" % request.args['search']
    if 'sort' in request.args:
        sorts = {
            'new': 'date_created',
            }
        sort_attr = sorts[request.args['sort']]
        questions_list = Question.get(order=(sort_attr, 'desc'), limit=30)
    else:
        page.title = 'Questions - Meno'
        questions_list = Question.get_latest(30)
    for question in questions_list:
        edit = question.latest_edit()[0]
        user = User.get_by_id(question.user_id)[0]
        age = question.age()
        stat = question.latest_status()[0]
        question_data = {
                'question_id': str(question.id),
                'user_id': str(question.user_id),
                'views': str(question.views),
                'votes': str(question.votes),
                'date_created': str(question.created),
                'category': str(Category.get_by_id(question.category_id)[0].name),
                'answers_count': str(count(question.answers())),
                'title': str(edit.title),
                'user': str(user.name),
                'status': str(stat.status),
                'age': str("Asked %sh %sm %ss ago" % (age[0], age[0], age[1])),
                }
        page_data.append(question_data)
        
    
    content = QuestionsList(page_data)

    local.request.session['last'] = request.base_url
    return respond(page.render(content))
Exemplo n.º 34
0
def update_graph(results_dump):
    # Update conditions
    if results_dump is None:
        raise PreventUpdate
    results = json.loads(results_dump)
    # compute ticks
    start_year = results["start_year"]
    end_year = results["end_year"]
    all_x = [(year, month) for year in range(start_year, end_year + 1)
             for month in range(1, 12 + 1)]
    # compute values
    # as map so we add zeros where they miss (todo: could be done with pandas probably)
    data = results["data"]
    if (len(data)):
        df = pd.DataFrame(data)
        utils.split_date(df)
        df_per_month = utils.per_month(df)
        per_month_count = utils.count(df_per_month)
        y_map = {
            year_month: per_month_count["count"].loc[year_month]
            for year_month in per_month_count.index
        }
    else:
        # no data
        y_map = {}

    y = [
        y_map[year_month] if year_month in y_map else 0 for year_month in all_x
    ]
    # TODO: whats the correct value?
    x = ["{}-{}-01".format(year, month)
         for (year, month) in all_x]  # list(range(0, len(y)))
    return {
        "data": [{
            "x": x,
            "y": y,
            "type": "bar",
        }],
        "layout": {
            "xaxis": {
                "tickformat": "%Y/%m"
            }
        }
    }
Exemplo n.º 35
0
    def build_tree(cls, dataset, func):
        if len(dataset) == 0:
            return DecisionTree()

        best_gain = 0.0
        best_feature = None
        best_split = None
        cur_score = func(dataset)
        feature_cnt = len(dataset[0]) - 1

        results = utils.count(dataset)
        result = sorted(results.items(), key=lambda x: x[1],
                        reverse=True)[0][0]
        error = 0
        for k, v in results.items():
            if k != result:
                error += v

        # Choose the best feature
        for i in range(feature_cnt):

            unique_values = list(set([data[i] for data in dataset]))
            for v in unique_values:
                true_set, false_set = cls._divide_set(dataset, i, v)

                p_true = float(len(true_set)) / len(dataset)
                p_false = 1 - p_true
                gain = cur_score - p_true * \
                    func(true_set) - p_false * func(false_set)

                if gain > best_gain and len(true_set) and len(false_set):
                    best_gain = gain
                    best_feature = (i, v)
                    best_split = (true_set, false_set)

        if not best_gain:
            return DecisionTree(result=result, results=results, error=error)

        true_branch = cls.build_tree(best_split[0], func)
        false_branch = cls.build_tree(best_split[1], func)
        return DecisionTree(feature=best_feature[0], value=best_feature[1], \
                    true_branch=true_branch, false_branch=false_branch, \
                    result=result, results=results, error=error)
Exemplo n.º 36
0
def _hello_world():
    if request.method == 'POST':
        id = request.form["id"]
        linkdata = request.form["linkdata"]
        linktitle = request.form["linktitle"]
        try:
            model = data.requre(linkdata, id)
            if len(model) != 0:
                count_matrix,cosine_sim = utils.count(model)
                indices = []
                for i in range(0, len(model), 1):
                    indices.append(model[i]['Title'])
            title = data.title(linktitle, id)
            result = utils.recommend(title,cosine_sim, indices, model)
            return jsonify(result)
        except:
            return "Error"
    else :
	    return "Hello, Flask!"
Exemplo n.º 37
0
def test_emb(data):
    a = 0.01
    data_emb = {}
    ingredient_emb = np.load("ingredient_emb.npy", allow_pickle=True)
    ingredient_dict = count(MAX_VOCAB_SIZE, data)  # 得到食材字典表,key是食材,value是次数
    for key in data:
        recipe_emb = {}
        for ingredient in data[key]:
            try:
                emb = ingredient_emb.item().get(ingredient) * a
                recipe_emb[ingredient] = emb / (
                    a + int(ingredient_dict[ingredient]))
            except:
                continue
        recipe = np.zeros(EMBEDDING_SIZE)
        for ingredient in recipe_emb:
            recipe = recipe + recipe_emb[ingredient]
        recipe = recipe / len(recipe_emb)
        data_emb[key] = recipe
    # np.save("recipe_emb_test.npy", data_emb)  # 将生成的向量保存在字典文件中
    return data_emb
Exemplo n.º 38
0
    def _sample(self, iterable, func, print_progress=True, progress_delta=5, max_retries=0, retry_delay=0.05):
        progress_count = 100 / progress_delta
        ratio = self.options.sampleSetRatio
        count = 0
        index = 0

        try:
            count = len(iterable)
        except:
            try:
                count = iterable.count()
            except:
                count = utils.count(iterable)

        for obj in iterable:
            if print_progress and (count < progress_count or 0 == (index % (count / progress_count))):
                utils.log("%s : %s" % (self.__class__.__name__, utils.getStatusStr(index, count)))

            if random.random() < ratio:
                noop = self.options.noop
                retries = 0

                while True:
                    try:
                        self.options.noop = (retries < max_retries) or noop
                        func(obj)
                        break
                    except Exception, e:
                        utils.printException()
                        retries += 1

                        if noop or retries > max_retries:
                            prefix = "ERROR" if noop else "UNRESOLVABLE ERROR"
                            utils.log("%s: %s" % (prefix, str(e)))
                            break

                        time.sleep(retry_delay)
                        retry_delay *= 2
                    finally:
                        self.options.noop = noop
Exemplo n.º 39
0
def random_walk(board, start, trail, turn_func, end_func, orth, length=None):
    if isinstance(turn_func, float):
        turn_func = prob(turn_func)

    if length:
        end_func = lambda x: x > length

    elif isinstance(end_func, float):
        end_func = prob(end_func)

    pos = start
    # MARQUE START NO TABULEIRO
    step = random_step(board, pos, orth)

    for l in count():
        valid_steps = available_steps(board, pos, orth)
        if not valid_steps:
            break

        # Cria sujeira em volta para não formar traços juntos
        for s in valid_steps:
            board[pos[0] + s[0]][pos[1] + s[1]] = .1

        # Pára se bater em alguma coisa
        if step not in valid_steps:
            break

        pos[0] += step[0]
        pos[1] += step[1]
        board[pos[0]][pos[1]] = trail

        if end_func(l):
            break

        if turn_func(l):
            step = random_step(board, pos, orth)

    return pos
Exemplo n.º 40
0
 def _is_survivor(self, cell, neighbors_and_me):
   return (cell in self.live_cells and
           EXTINCTION_THRESHOLD < count(neighbors_and_me) - 1 < OVERPOPULATION_THRESHOLD)
Exemplo n.º 41
0
    else:
        logloss += log(1-post)

for i in range(10000):
    net=struct()
#    net.a = (random()*.4)+.4
#    net.b_given_a=[random()*.6+.2]
#    net.b_given_a.append((net.b_given_a[0]+(random()*.4+.4))%1)
#    c_given_b=[random()*.6+.2]
#    c_given_b.append((c_given_b[0]+(random()*.4+.4))%1)
    net.a = random()
    net.b_given_a = [random(), random()]
    c_given_b = [random(), random()]
    net.c_given_ab = [c_given_b] * 2
    data = simulate(net, 100)
    cnt = count(zip(*data))
    if any(cnt, lambda(x):x==0):
        continue
    try:
#        print "--- %s ---" % net
#        sev = severs(data[0], data[1], data[2])
#        record(sev, False)
        #        if sev>10:
        #            print net
#        print "Severs:"
#        sev = severs(data[0], data[1], data[2])
#        record(sev, False)
#        sev = severs(data[1], data[2], data[0])
#        record(sev, False)
       sev = severs(data[0], data[2], data[1])
       record(sev, True)
Exemplo n.º 42
0
def p_given(a,b):
    cnt = count(zip(b,a))
    return [ float(x[1]) / sum(x) for x in cnt ]
Exemplo n.º 43
0
#!/usr/bin/python

from simulate_causal_net import create_big_net, simulate_big_net
from scipy.stats import chi2_contingency
from utils import count
from trio_test import has_common_cause

vs = 6

net = create_big_net(vs, 3)
data = simulate_big_net(net, 1000)

for i in range(vs):
    for j in range(vs):
        if net[i][j]:
            cnt = count(zip(data[i], data[j]))
            p = chi2_contingency(cnt)[1]
            print '%d->%d %.2f p<%.2f' % (i,j,net[i][j], p)

for i in range(vs):
    for j in range(vs):
        if i!=j and not net[i][j]:
            cnt = count(zip(data[i], data[j]))
            p = chi2_contingency(cnt)[1]
            if p<.05:
                print '%d,%d correlate' % (i,j)

            
for i in range(vs):
    for j in range(i):
        for k in range(j):
Exemplo n.º 44
0
 def testCount(self):
     yes = utils.count('playtennis', 'Yes', self.dataset)
     no = utils.count('playtennis', 'No', self.dataset)
     assert yes == 9
     assert no == 5
Exemplo n.º 45
0
 def _processItems(self, items):
     utils.log("[%s] processing %d items" % (self, utils.count(items)))
     AEntityProxy._processItems(self, items)
Exemplo n.º 46
0
def feature(t_begin, t_end, screen_names):

    ngram = {}
    table = {}

    for j, u in enumerate(screen_names):
        query = { 'created_at': { '$gt': t_begin, '$lt': t_end }, 'screen_name': u }
        for item in db.find(query):
            text = item['text']
            id = item['id']

            try:
                replied_id = item['in_reply_to_status_id']
                if replied_id:
                    for ii in db.find({ 'id': replied_id }):
                        text += u'。%s' % ii['text']
                
            except KeyError:
                pass

            """
            feats = bow.bagofwords(text)
            for f in feats:
                print(' '.join(f))
            continue
            """
            feat = extractd.getngram(text)
            for w in set(feat):
                if len(unicode(w)) < 2:
                    continue
                if len(patterns.hiragana.findall(unicode(w))[0]) == len(unicode(w)):
                    continue
                if w in patterns.english_words:
                    continue

                if not w in ngram: ngram[w] = {}
                
                utils.count(ngram[w], u)
                try:
                    #table[w].append(text)
                    table[w].add(id)
                except KeyError:
                    #table[w] = [ text ]
                    table[w] = set([ id ])

            tags = extractd.gethashtags(item)
            for t in set(tags):
                if not t in ngram: ngram[t] = {}
                
                utils.count(ngram[t], u)
                try:
                    #table[t].append(text)
                    table[t].add(id)
                except KeyError:
                    #table[t] = [ text ]
                    table[t] = set([ id ])

            urls = extractd.geturls(item)
            for l in set(urls):
                if not l in ngram: ngram[l] = {}

                utils.count(ngram[l], u)
                try:
                    #table[l].append(text)
                    table[l].add(id)
                except KeyError:
                    #table[l] = [ text ]
                    table[l] = set([ id ])

        print('%d/%d' % (j, len(screen_names)))
    return ngram, table
Exemplo n.º 47
0
cats=20

def record(sev, truth):
    post = 1.0 / (1 + 1/sev) # prior=1/2
    rpost = round(post*cats)
    tot[rpost] += 1
    if truth:
        hit[rpost] += 1

for i in range(1):
    net=struct()
    net.a = random()
    net.b_given_a=[random(), random()]
    c_given_b = [random(), random()]
    net.c_given_ab = [c_given_b] * 2
    net.d_given_a = [random(), random()]
    data = simulate(net, 1000)
    cnt = count(zip(*data))
    if any(cnt, lambda(x):x==0):
        print 'net: %s' % net
        print 'skipping %s' % cnt
        continue
    bf = has_common_cause_bf(data[0:3])
    record(bf, False)
    bf = has_common_cause_bf(data[1:4])
    record(bf, True)
    
for i in range(cats+1):
    print '%f: %f n= %d' % (i/float(cats), tot[i] and float(hit[i])/tot[i] or -1, tot[i])
Exemplo n.º 48
0
def test_count():
    import cv2
    import matplotlib.pyplot as plt
    img = cv2.imread("data/result/1.jpg")
    from utils import count
    print(count(img))
Exemplo n.º 49
0
def num_legal_values(csp, var, assignment):
    if csp.curr_domains:
        return len(csp.curr_domains[var])
    else:
        return count(csp.nconflicts(var, val, assignment) == 0
                     for val in csp.domains[var])
Exemplo n.º 50
0
 def _is_newborn(self, cell, neighbors):
   return cell not in self.live_cells and count(neighbors) == REPRODUCTION_CONDITION
Exemplo n.º 51
0
import json

import utils

if __name__ == '__main__':
    cost = []
    for group in utils.listGroups():
        for fn in utils.listGroupInstances(group):
            print(fn)
            topology = fn.split('.')[0]
            data = utils.read_data(group, 'edgeDisjointPaths', topology)
            assert data != None, topology
            res = data['results']
            for r in res:
                cost.append(r['maxSeg'])
    cost = utils.count(cost)
    cost = utils.dict_to_bar(cost)
    cost = utils.array_to_percent(cost)
    s = 0
    for i in range(6, len(cost)):
        s += cost[i]
    print(s)
    groups = [str(i) for i in range(len(cost))]
    utils.make_g_barplot([cost], groups, ['seg cost'], ['#3CAEA3'],
                         'segment cost', 'percentage of topologies', '',
                         '../data/plot/minCostEDP_segcost.eps', 5)
"""
  ax = plt.subplot()
  plot(x, y)
  plt.xlabel("Topology size |G|")
  plt.ylabel("Runtime in seconds")
Exemplo n.º 52
0
for species in data.bacteria:
    vals = data.get_data(pl, 'fractions', species, bucketizer=lambda(x):int(log(x,10)))
    ent[species] = utils.entropy(vals)

spe=ent.keys()
spe.sort(key=lambda x: ent[x], reverse=True)
species_by_entropy = spe

pvs={}
for species in data.bacteria:
     vals = data.get_data(pl, 'fractions', species)
     co = utils.findcutoff(vals, sick)
     if co.sick_when_more==None:
         continue
     boolvals = [(i>co.threshold)==(co.sick_when_more) for i in vals]
     cnts = utils.count(zip(sick, boolvals))
     try:
         pvs[species]=chi2_contingency(cnts)[1]
     except ValueError:
         pass

spe=pvs.keys()
spe.sort(key=lambda x: pvs[x])

filt2 = [x for x in species_by_entropy if x in pvs and pvs[x]<.1]
print len(species_by_entropy)
print len(pvs)
print len(ent)
print len(filt2)
examples=[filt2[i] for i in range(0,20,3)]
Exemplo n.º 53
0
#

import utils
import manip
import enrich

import numpy as np
import pandas as pd

utils.block_operator("data/SampleTextFile_1000kb.txt", utils.do_func(print))

print("\n")

print("Counting words in file:")
print("Words: ")
print(str(utils.count("data/SampleTextFile_1000kb.txt", "word")))
print("Lines: ")
print(str(utils.count("data/SampleTextFile_1000kb.txt", "line")))
print("All Characters in document: ")
print(str(utils.count("data/SampleTextFile_1000kb.txt")))

print("\n")
print("Use a regex to count the number of white spaces in the file:")
print(str(manip.count_matches("data/SampleTextFile_1000kb.txt", r'\s')))
print("Count the number of times 'er' appears at the end of a word:")
print(str(manip.count_matches("data/SampleTextFile_1000kb.txt", r'er\b')))

print("\n")
print("Substitute all instances of 'er' at a word-boundary with 'as'")
manip.sub_and_write("data/SampleTextFile_1000kb.txt", r'er\b', "as")
Exemplo n.º 54
0
def num_legal_values(csp, var, assignment, forward_checking):
    if csp.curr_domains and forward_checking:
        return len(csp.curr_domains[var])
    else:
        return count(csp.nconflicts(var, val, assignment) == 0
                     for val in csp.domains[var])
Exemplo n.º 55
0
print 'loading words...'
words = [line.strip().upper() for line in open('TWL06.txt')]
print 'done.'


'''
Looks like this expects you to tell it who owns each tile, after loading the board.
TODO(durandal): document this?
'''
import sys
board = None
while True:
  print '$ ',
  line = sys.stdin.readline()
  if not line: break
  line = line.strip()
  if line[0] is '`':
    board = open('games/%s.txt' % line[1:]).readline().strip().upper()
    print '"%s"' % board
    continue
  available = count_fancy(board,line)
  for k,v in available.iteritems():
    available[k].sort()
    available[k].reverse()
  print available
  legal = [(score_play(word,available), word) for word in words if is_subset(count(word), available)]
  if len(legal) == 0: print 'no legal moves'
  for score,word in sorted(legal):
    print score,word