Exemplo n.º 1
0
def get_score_median_95(y, y_train_temp):
    url_clicks = dict()
    for i in y_train_temp: # only consider the training urls
        url_clicks[int(float(i[0]))] = float(i[1])/float(i[2])
    median_ctr = utils.get_percentile_url(url_clicks, 50)
    percentile95_ctr =  utils.get_percentile_url(url_clicks, 95)
    # need to calculate score per user
    user_score_median = dict()
    user_score_95 = dict()
    for user in users_posting_urls:
        num_urls_with_ctr_above_median = 0
        num_urls_posting_in_training = 0
        for u in users_posting_urls[user]:
            if int(u) not in url_clicks: # might not be there because only training urls
                continue
            num_urls_posting_in_training += 1
            if url_clicks[int(u)] > median_ctr:
                num_urls_with_ctr_above_median += 1
        user_score_median[user] = utils.calc_score(0.5, num_urls_with_ctr_above_median, num_urls_posting_in_training)
        num_urls_with_ctr_above_median = 0
        for u in users_posting_urls[user]:
            if int(u) not in url_clicks: # might not be there because only training urls
                continue
            if url_clicks[int(u)] > percentile95_ctr:
                num_urls_with_ctr_above_median += 1
        user_score_95[user] = utils.calc_score(0.05, num_urls_with_ctr_above_median, num_urls_posting_in_training)
        #print user, users_posting_urls[user], user_score_median[user]

    return [user_score_median, user_score_95]
Exemplo n.º 2
0
def gen_individual(accept, reject):
    while True:
        ind = node.Node(depth=0, root=True)
        score = utils.calc_score(ind, accept, reject)

        if score != -1000:
            break

    return [ind, score]
Exemplo n.º 3
0
def spam_score(preds_file, vlt_file, threshold=0.5):
    vlts = read_vlt(vlt_file)

    preds = readpred_vw(preds_file, loss_func)
    preds = [ 1 if x[1] >= threshold else 0 for x in preds]

    logging.info('threshold: %f, pos ratio of pred: %f, vlts #: %d, pos ratio of vlts: %f' %
                 (threshold, float(sum(preds))/len(preds), len(vlts), sum(vlts)/float(len(vlts))))

    score = calc_score(vlts, preds)
    return score
Exemplo n.º 4
0
    def validate_score(self):
        self.net.eval()
        ans = []
        for in_q, in_a, _ in self.test_loader:
            in_q, in_a = in_q.detach(), in_a.detach()
            if config.use_cuda:
                in_q, in_a = in_q.cuda(), in_a.cuda()
            logit = self.net(in_q, in_a).flatten()
            ans += self.net.predict(logit).tolist()

        curr_map, curr_mrr = utils.calc_score(ans)
        score = utils.summarize_score(curr_map, curr_mrr)
        self.summary.add_score(self.epoch, self.step, score)
        if curr_map >= self.last_map:
            self.last_map = curr_map
            return True
        else:
            return False
Exemplo n.º 5
0
                # Do this only if the target_node wasn't the first node
                # infected in the cascade.
                # If it was the first node (else branch), then we cannot deduce
                # anything about the incoming edges.
                log_sum = 0
                for j in range(len(c)):
                    t_j = c[j][0]
                    alpha_ji = Ai[c[j][1]]

                    if t_j < t_i:
                        # TODO
                        # expr += ...
                        # log_sum += ...
                        pass

                expr += CVX.log(log_sum)

    prob = CVX.Problem(CVX.Maximize(expr), constraints)
    res = prob.solve(verbose=True)
    probs.append(prob)
    results.append(res)
    if prob.status in [CVX.OPTIMAL, CVX.OPTIMAL_INACCURATE]:
        A[:, target_node] = np.asarray(Ai.value).squeeze()
    else:
        A[:, target_node] = -1


A_soln = np.loadtxt('solution.csv', delimiter=',')

print(U.calc_score(A, A_soln))
Exemplo n.º 6
0
# Разделяем выборку на train и test в пропорции 1/9
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    Y_binarized,
                                                    test_size=0.1)

# Получаем инстанс decision tree классификатора
clf = DecisionTreeClassifier()

# Обучаем алгоритм по train выборке
clf_fit = clf.fit(X_train, y_train)

# Предсказываем значения train выборки
y_predicted_train = clf.predict(X_train)

# Считаем score по y фактическому и y предсказанному для train выборки
score = utils.calc_score(y_train, y_predicted_train)
print(score)

# Предсказываем значения test выборки
y_predicted_test = clf.predict(X_test)

# Считаем score по y фактическому и y предсказанному для test выборки
score = utils.calc_score(y_test, y_predicted_test)
print(score)

# Визуализируем дерево решений, которое сформировал классификатор, и сохраняем в pdf-файл
utils.decision_tree_save(clf, iris, "decision_tree.pdf")

# Предсказываем target-значения по выборке, опираясь на обученный ранее алгоритм (делаем выбор по дереву решений)
y_score = clf.predict(X_test)
def evaluate_mica(orig_labels, pred_labels):
    for class_id in eval_list:
        prec_val, rec_val, f1_val = calc_score(orig_labels, pred_labels,
                                               class_id)
        print(class_id + ": prec = {:.3f} rec = {:.3f} f1 = {:.3f}".format(
            prec_val, rec_val, f1_val))
Exemplo n.º 8
0
population = gen_population(1000, accept, reject)

bestest = gen_individual(accept, reject)
for i in range(100):
    best = ['fjkghdfkghdfkghsdkfhgksdfjhgksjdfhgkjsdhfgkjhsdfkghsdfkjghksdfhgkj', -9999]
    print("\nGeneration:", i)

    new_pop = []

    for _ in range(800):
        while True:
            try:
                m1 = utils.tournament_selection(population, 50)
                m2 = utils.tournament_selection(population, 50)
                child = utils.crossover(m1[0], m2[0])
                child = [child, utils.calc_score(child, accept, reject)]
                new_pop.append(child)
                break
            except:
                pass

    for _ in range(100):
        new_pop.append(gen_individual(accept, reject))

    for _ in range(100):
        while True:
            try:
                a = random.choice(population)
                b = utils.mutate(a[0])
                b = [b, utils.calc_score(b, accept, reject)]
                new_pop.append(b)
Exemplo n.º 9
0
            if c[0][0] != infection_time:
                # Do this only if the target_node wasn't the first node
                # infected in the cascade.
                # If it was the first node (else branch), then we cannot deduce
                # anything about the incoming edges.
                log_sum = 0
                for j in range(len(c)):
                    t_j = c[j][0]
                    alpha_ji = Ai[c[j][1]]

                    if t_j < t_i:
                        # TODO
                        # expr += ...
                        # log_sum += ...
                        pass

                expr += CVX.log(log_sum)

    prob = CVX.Problem(CVX.Maximize(expr), constraints)
    res = prob.solve(verbose=True)
    probs.append(prob)
    results.append(res)
    if prob.status in [CVX.OPTIMAL, CVX.OPTIMAL_INACCURATE]:
        A[:, target_node] = np.asarray(Ai.value).squeeze()
    else:
        A[:, target_node] = -1

A_soln = np.loadtxt('solution.csv', delimiter=',')

print(U.calc_score(A, A_soln))