Exemplo n.º 1
0
def cluster_users(clustering_tool, reader: DataReader,
                  clustered_users: List[User], clusters_number: int,
                  lists_count: int) -> Clusterings:
    """
    Cluster users by lists with numbers from 0 to given lists_count.
    Users are stiling from the reader object.
    In result clusters appears all user from the reader except users in clustered_users.
    :param clustering_tool: tool from sci-kit for clustering
    :param reader: DataReader object
    :param clustered_users: list of users that won't appear in clusterings
    :param clusters_number: expected number of clusters
    :param lists_count: cluster by lists from 1 to lists_count
    :return: Clusterings object, representing clusterings by different lists
    """
    clustering = Clusterings(clusters_number)
    all_users = reader.get_all_users()

    for list_number in range(0, lists_count):
        features_matrix = get_matrix_by_list_for_not_clustered_users(
            all_users, list_number, clustered_users)
        clusters_list = clustering_tool.fit_predict(features_matrix)
        clustering.add_clustering_for_list(
            convert_clusters_list_to_users_sets(reader, clusters_list,
                                                clustered_users), list_number)

    return clustering
Exemplo n.º 2
0
from experiments.experiment4.preferences_clustering import PreferencesClustering
from utils.data_reader import DataReader
from utils.json_serializer import Serializer
from utils.metrics import TeamDesiresMetric, ClusteringMetric

__author__ = 'Xomak'

reader = DataReader("../data/ms-sne.json")
pc = PreferencesClustering(reader.get_all_users(), 6)
result = pc.clusterize()
for current_set in result:
    output = []
    for user in current_set:
        output.append(str(user))
    print(','.join(output))
    print(TeamDesiresMetric(current_set))
print(ClusteringMetric(result).get_average_desires_metric())
Serializer.serialize_to_file(result, "../web-visualiser/data.json")

Exemplo n.º 3
0

desires_weight = 1
need_balance = True
metric_type = MetricTypes.DESIRES
data_files_names = [
    "../data/ms-sne_names.json", "../data/eltech-vector.json",
    "../data/users.json"
]

results = ResultHolder(data_files_names)

for data_file_name in data_files_names:

    reader = DataReader(data_file_name)
    max_teams = int(len(reader.get_all_users()) / 2)
    for teams in range(2, max_teams + 1):

        print("\n\nTEAMS: %d\n" % teams)

        # Spectral
        clustering_alg = UsersSpectralClustering(reader,
                                                 teams,
                                                 desires_weight=desires_weight,
                                                 need_balance=need_balance)
        sets = clustering_alg.cluster()
        results.add_metric_for(
            data_file_name, teams,
            ClusteringMetric(sets, metric_type).get_final_metric())

        # Agglomerative