Exemplos de get_candidates em Python, exemplos de candidate.get_candidates em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: validation.py Projeto: FindBoat/Kaggle

def generate_training_set(follow, followed, ratio, solution_file, data_file):
    """ Uses the solution file to generate training set to train
    the model, hoping this method can get better result.
    Ratio controls the fraction of pos and neg data sets, if ratio is -1,
    the fraction is the origion fraction."""

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for i in range(len(raw_solution)):
        row = raw_solution[i]
        dict_solution[int(row[0])] = set(int(n) for n in row[1 : :])

    x_train = [['spring brother is a true man']]
    for node in dict_solution.keys():
        nodes_pos = dict_solution[node]
        for n in nodes_pos:
            features = rank.get_features(follow, followed, node, n)
            x_train.append([1] + features)

        nodes_neg = candidate.get_candidates(follow, followed, node)
        nodes_neg.difference_update(nodes_pos)
        nodes_neg = list(nodes_neg)
        perm = random.permutation(len(nodes_neg))
        if ratio != -1:
            num = min(int(len(nodes_pos) * ratio), len(nodes_neg))
        else:
            num = len(nodes_neg)
        for i in range(num):
            node = nodes_neg[perm[i]]
            features = rank.get_features(follow, followed, node, n)
            x_train.append([0] + features)

    utilities.write_file(data_file, x_train)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: validation.py Projeto: sb123456789sb/MachineLearning-50

def generate_training_set(follow, followed, ratio, solution_file, data_file):
    """ Uses the solution file to generate training set to train
    the model, hoping this method can get better result.
    Ratio controls the fraction of pos and neg data sets, if ratio is -1,
    the fraction is the origion fraction."""

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for i in range(len(raw_solution)):
        row = raw_solution[i]
        dict_solution[int(row[0])] = set(int(n) for n in row[1::])

    x_train = [['spring brother is a true man']]
    for node in dict_solution.keys():
        nodes_pos = dict_solution[node]
        for n in nodes_pos:
            features = rank.get_features(follow, followed, node, n)
            x_train.append([1] + features)

        nodes_neg = candidate.get_candidates(follow, followed, node)
        nodes_neg.difference_update(nodes_pos)
        nodes_neg = list(nodes_neg)
        perm = random.permutation(len(nodes_neg))
        if ratio != -1:
            num = min(int(len(nodes_pos) * ratio), len(nodes_neg))
        else:
            num = len(nodes_neg)
        for i in range(num):
            node = nodes_neg[perm[i]]
            features = rank.get_features(follow, followed, node, n)
            x_train.append([0] + features)

    utilities.write_file(data_file, x_train)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: validation.py Projeto: sb123456789sb/MachineLearning-50

def analyze_candidates(solution_file, follow, followed):
    """ Analyzes the method get_candidates. """

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for row in raw_solution:
        dict_solution[int(row[0])] = set(int(n) for n in row[1::])

    count_total = 0
    count_miss = 0
    for node in dict_solution:
        candidates = candidate.get_candidates(follow, followed, node)
        for n in dict_solution[node]:
            if n not in candidates:
                count_miss += 1
        count_total += len(dict_solution[node])

    print 'count_total = %d, count_miss = %d' % (count_total, count_miss)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: validation.py Projeto: FindBoat/Kaggle

def analyze_candidates(solution_file, follow, followed):
    """ Analyzes the method get_candidates. """

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for row in raw_solution:
        dict_solution[int(row[0])] = set(int(n) for n in row[1 : :])

    count_total = 0
    count_miss = 0
    for node in dict_solution:
        candidates = candidate.get_candidates(follow, followed, node)
        for n in dict_solution[node]:
            if n not in candidates:
                count_miss += 1
        count_total += len(dict_solution[node])

    print 'count_total = %d, count_miss = %d' %(
        count_total, count_miss)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: main.py Projeto: FindBoat/Kaggle

def suggest_friends(follow, followed, clf, node, popular_people,
    max_suggestion):
    """ Suggests friends for a given node. """

    if not follow.has_key(node):
        return []

    candidates = candidate.get_candidates(follow, followed, node)
    suggested = rank.rank_candidates(follow, followed, clf, node, candidates)

    # Suggests most popular people when candidates are less than 10.
    if len(suggested) < max_suggestion:
        for star in popular_people:
            if star not in suggested:
                suggested.append(star)
            if len(suggested) >= max_suggestion:
                break
    else:
        suggested = suggested[0 : max_suggestion]

    return suggested

Exemplo n.º 6

0

Exibir arquivo

Arquivo: main.py Projeto: sb123456789sb/MachineLearning-50

def suggest_friends(follow, followed, clf, node, popular_people,
                    max_suggestion):
    """ Suggests friends for a given node. """

    if not follow.has_key(node):
        return []

    candidates = candidate.get_candidates(follow, followed, node)
    suggested = rank.rank_candidates(follow, followed, clf, node, candidates)

    # Suggests most popular people when candidates are less than 10.
    if len(suggested) < max_suggestion:
        for star in popular_people:
            if star not in suggested:
                suggested.append(star)
            if len(suggested) >= max_suggestion:
                break
    else:
        suggested = suggested[0:max_suggestion]

    return suggested