def instantiate_networkdata_class(dataset_domain, dataset_path, impl_type, 
                                  max_core_nodes, cutoff_rating, store_dataset, 
                                  interact_type_val, min_interacts_beforeaftersplit_per_user):
    data = None
    #h = hpy()
    #h.setref()
    if dataset_domain == "twitter":
        data = HashtagDataPreparser(dataset_path, impl_type)
    elif dataset_domain== "lastfm":
        data = LastfmDataPreparserCSV(dataset_path, impl_type, cutoff_rating,
                                   max_core_nodes, store_dataset, use_artists=False)
    elif dataset_domain== "lastfm_simple":
        data = LastfmDataPreparserSimple(dataset_path, impl_type, cutoff_rating,
                                   max_core_nodes, store_dataset, use_artists=False, 
                                   interact_type_val=interact_type_val,
                                   min_interactions_per_user=min_interacts_beforeaftersplit_per_user*2)
    elif dataset_domain== "lastfm_lovelisten":
        data = LastfmDataPreparserLovelisten(dataset_path, impl_type, cutoff_rating,
                                   max_core_nodes, store_dataset, use_artists=False, 
                                   interact_type_val=interact_type_val,
                                   min_interactions_per_user=min_interacts_beforeaftersplit_per_user*2)
    elif dataset_domain=="goodreads":
        data = GoodreadsDataPreparser(dataset_path, impl_type, cutoff_rating,
                                      max_core_nodes, store_dataset, 
                                      min_interactions_per_user = min_interacts_beforeaftersplit_per_user*2)
    elif dataset_domain=="flixster":
        data = FlixsterDataPreparser(dataset_path, impl_type, cutoff_rating, 
                                     max_core_nodes, store_dataset, 
                                     min_interactions_per_user=min_interacts_beforeaftersplit_per_user*2)
    elif dataset_domain=="flickr":
        data = FlickrDataPreparser(dataset_path, impl_type, cutoff_rating, 
                                   max_core_nodes, store_dataset,
                                   min_interactions_per_user=min_interacts_beforeaftersplit_per_user*2)
    
    try:
        data.get_all_data()
        BasicNetworkAnalyzer(data).show_basic_stats()
    except:
        raise
    return data
        node = self.netdata.nodes[node_id] 
        node.get_details(self.netdata.interaction_types[0])
        return

    """
    def getItemPopularityInDataset(data):
        likes = {}
        for k, v in data.allusers.iteritems():
            for itemid, created in v.likes:
                if itemid not in likes:
                    likes[itemid] = 0
                likes[itemid] += 1

        likes_hist={}
        in_sum = 0
        for val in likes.values():
            if val<100:
                in_sum += 1
                if val not in likes_hist:
                    likes_hist[val] = 0
                likes_hist[val] += 1
        items_covered_ratio = in_sum /float(len(likes))
        return list(likes_hist.iteritems()), items_covered_ratio, likes.values()
    """

if __name__ == "__main__":
    data = HashtagDataPreparser("/home/asharma/datasets/ttest/")
    data.get_all_data()
    net_analyzer = BasicNetworkAnalyzer(data)
    net_analyzer.show_basic_stats()
Пример #3
0
def instantiate_networkdata_class(dataset_domain, dataset_path, impl_type,
                                  max_core_nodes, cutoff_rating, store_dataset,
                                  interact_type_val,
                                  min_interacts_beforeaftersplit_per_user):
    data = None
    #h = hpy()
    #h.setref()
    if dataset_domain == "twitter":
        data = HashtagDataPreparser(dataset_path, impl_type)
    elif dataset_domain == "lastfm":
        data = LastfmDataPreparserCSV(dataset_path,
                                      impl_type,
                                      cutoff_rating,
                                      max_core_nodes,
                                      store_dataset,
                                      use_artists=False)
    elif dataset_domain == "lastfm_simple":
        data = LastfmDataPreparserSimple(
            dataset_path,
            impl_type,
            cutoff_rating,
            max_core_nodes,
            store_dataset,
            use_artists=False,
            interact_type_val=interact_type_val,
            min_interactions_per_user=min_interacts_beforeaftersplit_per_user *
            2)
    elif dataset_domain == "lastfm_lovelisten":
        data = LastfmDataPreparserLovelisten(
            dataset_path,
            impl_type,
            cutoff_rating,
            max_core_nodes,
            store_dataset,
            use_artists=False,
            interact_type_val=interact_type_val,
            min_interactions_per_user=min_interacts_beforeaftersplit_per_user *
            2)
    elif dataset_domain == "goodreads":
        data = GoodreadsDataPreparser(
            dataset_path,
            impl_type,
            cutoff_rating,
            max_core_nodes,
            store_dataset,
            min_interactions_per_user=min_interacts_beforeaftersplit_per_user *
            2)
    elif dataset_domain == "flixster":
        data = FlixsterDataPreparser(
            dataset_path,
            impl_type,
            cutoff_rating,
            max_core_nodes,
            store_dataset,
            min_interactions_per_user=min_interacts_beforeaftersplit_per_user *
            2)
    elif dataset_domain == "flickr":
        data = FlickrDataPreparser(
            dataset_path,
            impl_type,
            cutoff_rating,
            max_core_nodes,
            store_dataset,
            min_interactions_per_user=min_interacts_beforeaftersplit_per_user *
            2)

    try:
        data.get_all_data()
        BasicNetworkAnalyzer(data).show_basic_stats()
    except:
        raise
    return data