Exemplo n.º 1
0
def generate_training_set(follow, followed, ratio, solution_file, data_file):
    """ Uses the solution file to generate training set to train
    the model, hoping this method can get better result.
    Ratio controls the fraction of pos and neg data sets, if ratio is -1,
    the fraction is the origion fraction."""

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for i in range(len(raw_solution)):
        row = raw_solution[i]
        dict_solution[int(row[0])] = set(int(n) for n in row[1 : :])

    x_train = [['spring brother is a true man']]
    for node in dict_solution.keys():
        nodes_pos = dict_solution[node]
        for n in nodes_pos:
            features = rank.get_features(follow, followed, node, n)
            x_train.append([1] + features)

        nodes_neg = candidate.get_candidates(follow, followed, node)
        nodes_neg.difference_update(nodes_pos)
        nodes_neg = list(nodes_neg)
        perm = random.permutation(len(nodes_neg))
        if ratio != -1:
            num = min(int(len(nodes_pos) * ratio), len(nodes_neg))
        else:
            num = len(nodes_neg)
        for i in range(num):
            node = nodes_neg[perm[i]]
            features = rank.get_features(follow, followed, node, n)
            x_train.append([0] + features)

    utilities.write_file(data_file, x_train)
def generate_training_set(follow, followed, ratio, solution_file, data_file):
    """ Uses the solution file to generate training set to train
    the model, hoping this method can get better result.
    Ratio controls the fraction of pos and neg data sets, if ratio is -1,
    the fraction is the origion fraction."""

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for i in range(len(raw_solution)):
        row = raw_solution[i]
        dict_solution[int(row[0])] = set(int(n) for n in row[1::])

    x_train = [['spring brother is a true man']]
    for node in dict_solution.keys():
        nodes_pos = dict_solution[node]
        for n in nodes_pos:
            features = rank.get_features(follow, followed, node, n)
            x_train.append([1] + features)

        nodes_neg = candidate.get_candidates(follow, followed, node)
        nodes_neg.difference_update(nodes_pos)
        nodes_neg = list(nodes_neg)
        perm = random.permutation(len(nodes_neg))
        if ratio != -1:
            num = min(int(len(nodes_pos) * ratio), len(nodes_neg))
        else:
            num = len(nodes_neg)
        for i in range(num):
            node = nodes_neg[perm[i]]
            features = rank.get_features(follow, followed, node, n)
            x_train.append([0] + features)

    utilities.write_file(data_file, x_train)
def analyze_candidates(solution_file, follow, followed):
    """ Analyzes the method get_candidates. """

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for row in raw_solution:
        dict_solution[int(row[0])] = set(int(n) for n in row[1::])

    count_total = 0
    count_miss = 0
    for node in dict_solution:
        candidates = candidate.get_candidates(follow, followed, node)
        for n in dict_solution[node]:
            if n not in candidates:
                count_miss += 1
        count_total += len(dict_solution[node])

    print 'count_total = %d, count_miss = %d' % (count_total, count_miss)
Exemplo n.º 4
0
def analyze_candidates(solution_file, follow, followed):
    """ Analyzes the method get_candidates. """

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for row in raw_solution:
        dict_solution[int(row[0])] = set(int(n) for n in row[1 : :])

    count_total = 0
    count_miss = 0
    for node in dict_solution:
        candidates = candidate.get_candidates(follow, followed, node)
        for n in dict_solution[node]:
            if n not in candidates:
                count_miss += 1
        count_total += len(dict_solution[node])

    print 'count_total = %d, count_miss = %d' %(
        count_total, count_miss)
Exemplo n.º 5
0
def suggest_friends(follow, followed, clf, node, popular_people,
    max_suggestion):
    """ Suggests friends for a given node. """

    if not follow.has_key(node):
        return []

    candidates = candidate.get_candidates(follow, followed, node)
    suggested = rank.rank_candidates(follow, followed, clf, node, candidates)

    # Suggests most popular people when candidates are less than 10.
    if len(suggested) < max_suggestion:
        for star in popular_people:
            if star not in suggested:
                suggested.append(star)
            if len(suggested) >= max_suggestion:
                break
    else:
        suggested = suggested[0 : max_suggestion]

    return suggested
Exemplo n.º 6
0
def suggest_friends(follow, followed, clf, node, popular_people,
                    max_suggestion):
    """ Suggests friends for a given node. """

    if not follow.has_key(node):
        return []

    candidates = candidate.get_candidates(follow, followed, node)
    suggested = rank.rank_candidates(follow, followed, clf, node, candidates)

    # Suggests most popular people when candidates are less than 10.
    if len(suggested) < max_suggestion:
        for star in popular_people:
            if star not in suggested:
                suggested.append(star)
            if len(suggested) >= max_suggestion:
                break
    else:
        suggested = suggested[0:max_suggestion]

    return suggested