示例#1
0
def run_rt_processing():
    click.echo(
        "Provide the full path the the raw tweet processing config(leave blank to set to default)"
    )
    default_path = get_project_root(
    ) / 'src' / 'process' / 'raw_tweet_processing' / 'rt_processing_config.yaml'
    rt_processing_config_path = click.prompt("Path", default_path)
    rt_processing_config_parser = RawTweetProcessingConfigParser(
        rt_processing_config_path)
    tweet_getter = rt_processing_config_parser.create_getter_DAOs()
    tweet_setter, processed_tweet_setter = rt_processing_config_parser.create_setter_DAOs(
    )
    tweet_processor = RawTweetProcessor()

    click.echo("Process Types")
    click.echo("1. Global(random) Tweets")
    click.echo("2. User Tweets")
    process_type = click.prompt("Choose what to process", type=int)

    if process_type == 1:
        click.echo("Processing global tweets")
        tweet_processor.gen_processed_global_tweets(tweet_getter, tweet_setter,
                                                    processed_tweet_setter)
    elif process_type == 2:
        click.echo("Processing user tweets")
        tweet_processor.gen_processed_user_tweets(tweet_getter, tweet_setter,
                                                  processed_tweet_setter)
    else:
        raise Exception("Invalid input")
示例#2
0
def run_wf():
    default_path = get_project_root(
    ) / 'src' / 'process' / 'word_frequency' / 'wf_config.yaml'
    wf_config_path = click.prompt("Path", default_path)
    wf_config_parser = WordFrequencyConfigParser(wf_config_path)
    processed_tweet_getter, wf_getter = wf_config_parser.create_getter_DAOs()
    processed_tweet_setter, wf_setter = wf_config_parser.create_setter_DAOs()
    word_freq = WordFrequency()

    click.echo("Word Vector Types")
    click.echo("1. Global Word Count Vector")
    click.echo("2. Global Word Frequency Vector")
    click.echo("3. User Word Count Vector")
    click.echo("4. User Word Frequency Vector")
    click.echo("5. Relative User Word Frequency Vector")
    wf_type = click.prompt("Choose what to compute", type=int)

    if wf_type == 1:
        click.echo("Computing global word count vector")
        word_freq.gen_global_word_count_vector(processed_tweet_getter,
                                               processed_tweet_setter,
                                               wf_setter)
    elif wf_type == 2:
        click.echo("Computing global word frequency vector")
        word_freq.gen_global_word_frequency_vector(wf_getter, wf_setter)
    elif wf_type == 3:
        click.echo("Computing user word count vector")
        word_freq.gen_user_word_count_vector(processed_tweet_getter,
                                             processed_tweet_setter, wf_setter)
    elif wf_type == 4:
        click.echo("Computing user word frequency vector")
        word_freq.gen_user_word_frequency_vector(wf_getter, wf_setter)
    elif wf_type == 5:
        click.echo("Computing relative user word frequency vector")
        word_freq.gen_relative_user_word_frequency_vector(wf_getter, wf_setter)
示例#3
0
def run_social_graph():
    default_path = get_project_root(
    ) / 'src' / 'process' / 'social_graph' / 'social_graph_config.yaml'
    social_graph_path = click.prompt("Path", default_path)
    social_graph_config_parser = SocialGraphConfigParser(social_graph_path)
    user_friends_getter = social_graph_config_parser.create_getter_DAOs()
    social_graph_setter = social_graph_config_parser.create_setter_DAOs()
    social_graph = SocialGraph()

    click.echo("Social Graph options")
    click.echo("1. User Friends Graph")
    social_graph_option = click.prompt("Choose what to compute", type=int)

    if social_graph_option == 1:
        click.echo("Computing user friends graph")
        click.echo(
            "Reminder: make sure to have downloaded the local neighborhood for your user of interest"
        )
        use_user_list, user_or_user_list, ulp = get_user()
        if use_user_list:
            ulp.run_function_by_user_list(social_graph.gen_user_friends_graph,
                                          user_or_user_list,
                                          user_friends_getter,
                                          social_graph_setter)
        else:
            social_graph.gen_user_friends_graph(user_or_user_list,
                                                user_friends_getter,
                                                social_graph_setter)
    else:
        raise Exception("Invalid input")
示例#4
0
 def main():
     tweet_downloader = TwitterTweetDownloader()
     config_path = get_project_root(
     ) / 'src' / 'process' / 'download' / 'download_config.yaml'
     download_config_parser = DownloadConfigParser(config_path)
     tweepy_getter, _ = download_config_parser.create_getter_DAOs()
     tweet_mongo_setter, _, _ = download_config_parser.create_setter_DAOs()
     tweet_downloader.gen_random_tweet(tweepy_getter, tweet_mongo_setter)
示例#5
0
 def global_word_count():
     word_frequency = WordFrequency()
     config_path = get_project_root(
     ) / 'src' / 'process' / 'word_frequency' / 'wf_config.yaml'
     word_frequency_parser = WordFrequencyConfigParser(config_path)
     processed_tweet_getter, _ = word_frequency_parser.create_getter_DAOs()
     processed_tweet_setter, wf_setter = word_frequency_parser.create_setter_DAOs(
     )
     word_frequency.gen_global_word_count_vector(processed_tweet_getter,
                                                 processed_tweet_setter,
                                                 wf_setter)
示例#6
0
def get_user():
    ulp = None
    use_user_list = click.confirm("Do you wish to provide a user list?")
    if use_user_list:
        default_ul_path = get_project_root() / 'src' / 'tools' / 'user_list'
        ul_path = click.prompt("User List Path", default_ul_path)
        ulp = UserListProcessor()
        user_or_user_list = ulp.user_list_parser(ul_path)
    else:
        user_or_user_list = click.prompt("User")

    return use_user_list, user_or_user_list, ulp
示例#7
0
import argparse
import time
from src.dependencies.injector import Injector
from src.shared.utils import get_project_root
from src.shared.logger_factory import LoggerFactory
from typing import List

log = LoggerFactory.logger(__name__)

DEFAULT_PATH = str(
    get_project_root()) + "/src/scripts/config/detect_community_config.yaml"


def detect_community(name_list: List, path=DEFAULT_PATH):
    try:
        injector = Injector.get_injector_from_file(path)
        process_module = injector.get_process_module()

        community_detector = process_module.get_community_detector()
        community_detector.detect_community_by_screen_name(name_list)
    except Exception as e:
        log.exception(e)
        exit()


if __name__ == "__main__":
    """
    Short script to download tweets
    """
    parser = argparse.ArgumentParser(
        description='Detect community from seed sets')
示例#8
0
import argparse
from src.shared.utils import get_project_root
from src.scripts.parser.parse_config import parse_from_file
from src.activity.cluster_social_graph_activity import ClusterSocialGraphActivity

DEFAULT_PATH = str(get_project_root()) + "/src/scripts/config/cluster_social_graph_config.yaml"


def cluster_social_graph(seed_id: str, params=None, path=DEFAULT_PATH):
    config = parse_from_file(path)

    activity = ClusterSocialGraphActivity(config)
    activity.cluster_social_graph(seed_id, params)


if __name__ == "__main__":
    """
    Short script to perform clustering on a social graph
    """
    parser = argparse.ArgumentParser(description='Downloads the given number of tweets')
    parser.add_argument('-s', '--seed_id', dest='seed_id', required=True,
        help='The seed id of the local neighbourhood to convert into a social graph', type=str)
    parser.add_argument('-p', '--path', dest='path', required=False,
        default=DEFAULT_PATH, help='The path of the config file', type=str)

    args = parser.parse_args()

    cluster_social_graph(args.seed_id, path=args.path)
示例#9
0
from src.activity.download_user_tweets_activity import DownloadUserTweetsActivity
import argparse
import time
from src.dependencies.injector import Injector
from src.shared.utils import get_project_root
from src.shared.logger_factory import LoggerFactory
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from src.model.local_neighbourhood import LocalNeighbourhood
import json

log = LoggerFactory.logger(__name__)

DEFAULT_PATH = str(get_project_root()) + "/src/scripts/config/default_config.yaml"

def produce_plots(seed_id: str, user_name: str, path=DEFAULT_PATH):
    threshold = 60

    injector = Injector.get_injector_from_file(path)
    process_module = injector.get_process_module()
    dao_module = injector.get_dao_module()

    user_friend_getter = dao_module.get_user_friend_getter()
    friends_cleaner = process_module.get_friends_cleaner()
    social_graph_constructor = process_module.get_social_graph_constructor()
    clusterer = process_module.get_clusterer()
    cluster_word_frequency_processor = process_module.get_cluster_word_frequency_processor()

    tweet_processor = process_module.get_tweet_processor()
示例#10
0
def test_getProjectRoot_returnsExpectedInput():
    path = utils.get_project_root()
    path_string = str(path)

    assert path_string.endswith("core")
示例#11
0
import argparse
from src.shared.utils import get_project_root
from src.dependencies.injector import Injector

DEFAULT_PATH = str(get_project_root(
)) + "/src/scripts/config/process_local_neighbourhood_tweets_config.yaml"


def process_user_tweets(id: str, path=DEFAULT_PATH):
    injector = Injector.get_injector_from_file(path)
    process_module = injector.get_process_module()

    tweet_processor = process_module.get_tweet_processor()

    tweet_processor.process_tweets_by_user_id(id)


if __name__ == "__main__":
    """
    Short script to process the tweets of a Local Neighbourhood
    """
    parser = argparse.ArgumentParser(
        description='Processes the tweets of the given user')
    parser.add_argument('-i',
                        '--id',
                        dest='id',
                        required=True,
                        help='The id of the user',
                        type=str)
    parser.add_argument('-p',
                        '--path',
示例#12
0
from src.activity.download_local_neighbourhood_activity import DownloadLocalNeighbourhoodActivity
import argparse
import time
from src.scripts.parser.parse_config import parse_from_file
from src.shared.utils import get_project_root

DEFAULT_PATH = str(get_project_root(
)) + "/src/scripts/config/download_local_neighbourhood_config.yaml"


def download_local_neighbourhood(name: str, path=DEFAULT_PATH):
    config = parse_from_file(path)

    activity = DownloadLocalNeighbourhoodActivity(config)
    activity.download_local_neighbourhood_by_screen_name(name)


if __name__ == "__main__":
    """
    Short script to download tweets
    """
    parser = argparse.ArgumentParser(
        description='Downloads the local neighbourhood of the given user')
    parser.add_argument('-n',
                        '--name',
                        dest='name',
                        required=True,
                        help='The name of the user to start on',
                        type=str)
    parser.add_argument('-p',
                        '--path',
示例#13
0
import argparse
from src.shared.utils import get_project_root
from src.dependencies.injector import Injector
from src.shared.logger_factory import LoggerFactory

log = LoggerFactory.logger(__name__)

DEFAULT_PATH = str(
    get_project_root()) + "/src/scripts/config/download_user_config.yaml"


def download_user(name: str, path=DEFAULT_PATH):
    injector = Injector.get_injector_from_file(path)
    process_module = injector.get_process_module()

    user_downloader = process_module.get_user_downloader()

    log.info("Starting Download user with name: %s" % (name))
    user_downloader.download_user_by_screen_name(name)
    log.info("Done downloading user: %s" % (name))


if __name__ == "__main__":
    """
    Short script to download users
    """
    parser = argparse.ArgumentParser(description='Downloads the given user')
    parser.add_argument('-n',
                        '--name',
                        dest='name',
                        help="The screen name of the user to download",
示例#14
0
from src.data_cleaning.Activity.clean_friends_activity import FriendsCleaningActivity
import argparse
import time
from src.data_cleaning.parser.parse_config import parse_from_file
from src.shared.utils import get_project_root

DEFAULT_PATH = str(
    get_project_root()) + "/src/data_cleaning/config/friends_cleaning.yaml"


def friends_cleaning(threshold: int,
                     method: str,
                     base_user: str,
                     path=DEFAULT_PATH):
    config = parse_from_file(path)

    activity = FriendsCleaningActivity(config)
    if method == "1":
        activity.clean_by_friends(base_user, threshold)
    elif method == "2":
        activity.clean_by_tweets(base_user, threshold)
    else:
        raise NotImplementedError()


if __name__ == "__main__":
    """
    Clean inactive users in the friends list.
    """
    parser = argparse.ArgumentParser(description='Get rid of inactive users')
    parser.add_argument('-t',
示例#15
0
def run_download():
    click.echo(
        "Provide the full path the the download config(leave blank to set to default)"
    )
    default_path = get_project_root(
    ) / 'src' / 'process' / 'download' / 'download_config.yaml'
    download_config_path = click.prompt("Path", default_path)
    download_config_parser = DownloadConfigParser(download_config_path)
    tweepy_getter, user_friends_getter = download_config_parser.create_getter_DAOs(
    )
    tweet_mongo_setter, user_friends_setter, user_followers_setter = download_config_parser.create_setter_DAOs(
    )

    click.echo("Download Types:")
    click.echo("1. Twitter Tweet Download")
    click.echo("2. Twitter Friends Download")
    click.echo("3. Twitter Followers Download")
    download_type = click.prompt("Choose a download type", type=int)

    if download_type == 1:
        tweet_downloader = TwitterTweetDownloader()
        click.echo("Tweet types:")
        click.echo("1. User Tweets")
        click.echo("2. Random Tweets")
        tweet_type = click.prompt("Choose what to download", type=int)
        if tweet_type == 1:
            # TODO: follow by this example
            click.echo("Downloading User Tweets")
            use_user_list, user_or_user_list, ulp = get_user()
            num_tweets = click.prompt(
                "Number of Tweets(leave blank to get all)", type=int)
            if click.confirm("Do you want to specify a start and end date?"):
                start_date = get_date(click.prompt("Start Date(YYYY-MM-DD)"))
                end_date = get_date(click.prompt("End Date(YYYY-MM-DD)"))
                if use_user_list:
                    ulp.run_function_by_user_list(
                        tweet_downloader.gen_user_tweets, user_or_user_list,
                        tweepy_getter, tweet_mongo_setter, num_tweets,
                        start_date, end_date)
                else:
                    tweet_downloader.gen_user_tweets(user_or_user_list,
                                                     tweepy_getter,
                                                     tweet_mongo_setter,
                                                     num_tweets, start_date,
                                                     end_date)
            else:
                if use_user_list:
                    ulp.run_function_by_user_list(
                        tweet_downloader.gen_user_tweets, user_or_user_list,
                        tweepy_getter, tweet_mongo_setter, num_tweets)
                else:
                    tweet_downloader.gen_user_tweets(user_or_user_list,
                                                     tweepy_getter,
                                                     tweet_mongo_setter,
                                                     num_tweets)
        elif tweet_type == 2:
            click.echo("Downloading Random Tweets")
            click.echo(
                "Due to Tweepy constraints, if you want to download multiple tweets, you should launch a daemon"
            )
            if click.confirm(
                    "Do you wish to launch a daemon to download random tweets?"
            ):
                click.echo("Launching daemon")
                download_daemon.download_random_tweet()
            else:
                tweet_downloader.gen_random_tweet(tweepy_getter,
                                                  tweet_mongo_setter)
    elif download_type == 2:
        click.echo("Friend Download Types")
        click.echo("1. User Friends")
        click.echo("2. User Local Neighborhood")
        friend_type = click.prompt("Choose which to download", type=int)

        friends_downloader = TwitterFriendsDownloader()
        if friend_type == 1:
            click.echo("Downloading user friends")
            use_user_list, user_or_user_list, ulp = get_user()
            num_friends = click.prompt(
                "Number of Friends(leave blank to get all)", type=int)
            if use_user_list:
                ulp.run_function_by_user_list(
                    friends_downloader.gen_friends_by_screen_name,
                    user_or_user_list, tweepy_getter, user_friends_setter,
                    num_friends)
            else:
                friends_downloader.gen_friends_by_screen_name(
                    user_or_user_list, tweepy_getter, user_friends_setter,
                    num_friends)
        elif friend_type == 2:
            click.echo("Downloading user local neightborhood")
            use_user_list, user_or_user_list, ulp = get_user()
            if use_user_list:
                ulp.run_function_by_user_list(
                    friends_downloader.gen_user_local_neighborhood,
                    user_or_user_list, tweepy_getter, user_friends_getter,
                    user_friends_setter)
            else:
                friends_downloader.gen_user_local_neighborhood(
                    user_or_user_list, tweepy_getter, user_friends_getter,
                    user_friends_setter)
        else:
            raise Exception("Invalid input")
    elif download_type == 3:
        click.echo("Downloading followers")
        use_user_list, user_or_user_list, ulp = get_user()
        num_followers = click.prompt(
            "Number of Followers(leave blank to get all)", type=int)
        followers_downloader = TwitterFollowersDownloader()
        if use_user_list:
            ulp.run_function_by_user_list(
                followers_downloader.gen_followers_by_screen_name,
                user_or_user_list, tweepy_getter, user_followers_setter,
                num_followers)
        else:
            followers_downloader.gen_followers_by_screen_name(
                user_or_user_list, tweepy_getter, user_followers_setter,
                num_followers)
    else:
        raise Exception("Invalid input")
示例#16
0
from src.activity.download_raw_tweets_activity import DownloadTweetsActivity
import argparse
import time
from src.scripts.parser.parse_config import parse_from_file
from src.shared.utils import get_project_root

DEFAULT_PATH = str(
    get_project_root()) + "/src/scripts/config/download_tweets_config.yaml"


def download_tweets(num: int, path=DEFAULT_PATH):
    config = parse_from_file(path)

    activity = DownloadTweetsActivity(config)
    activity.stream_random_tweets(num_tweets=num)


if __name__ == "__main__":
    """
    Short script to download tweets
    """
    parser = argparse.ArgumentParser(
        description='Downloads the given number of tweets')
    parser.add_argument('-n',
                        '--num',
                        dest='num',
                        required=True,
                        help='The number of tweets to download',
                        type=int)
    parser.add_argument('-p',
                        '--path',
示例#17
0
from src.algorithm.Activity.top_words_activity import TopWordsActivity
import argparse
import time
from src.algorithm.top_words.parser.parse_config import parse_from_file
from src.shared.utils import get_project_root

DEFAULT_PATH = str(get_project_root()) + "/src/algorithm/top_words/config/top_words.yaml"
DEFAULT_METHOD = "Label Propagation"

def top_words(cluster_num: int, base_user: str, cluster_type: str, path=DEFAULT_PATH):
    config = parse_from_file(path)

    activity = TopWordsActivity(config)
    activity.find_top_words(cluster_num, base_user,cluster_type)

if __name__ == "__main__":
    """
    Clean inactive users in the friends list.
    """
    parser = argparse.ArgumentParser(description='Get rid of inactive users')
    parser.add_argument('-c', '--cluster number', dest='cluster_num', required=True,
        help='which cluster to find top words', type=int)
    parser.add_argument('-u', '--user', dest='base_user', required=True,
        help='The target user', type=str)
    parser.add_argument('-t', '--clustering type', dest='cluster_type', required=False,
        default=DEFAULT_METHOD, help='Label Propagation / Max Modularity', type=str)
    parser.add_argument('-p', '--path', dest='path', required=False,
        default=DEFAULT_PATH, help='The path of the config file', type=str)

    args = parser.parse_args()
示例#18
0
import argparse
from src.shared.utils import get_project_root
from src.dependencies.injector import Injector
from typing import List

DEFAULT_PATH = str(get_project_root()) + "/src/scripts/config/cluster_word_frequency_config.yaml"

def get_cluster_word_frequency(ids: List[str], path=DEFAULT_PATH):
    injector = Injector.get_injector_from_file(path)
    process_module = injector.get_process_module()

    cluster_word_frequency_processor = process_module.get_cluster_word_frequency_processor()

    cluster_word_frequency_processor.process_cluster_word_frequency_vector(ids)
    cluster_word_frequency_processor.process_relative_cluster_word_frequency(ids)

if __name__ == "__main__":
    """
    Short script to download users
    """
    parser = argparse.ArgumentParser(description='Downloads the given user')
    parser.add_argument('-u', '--users', dest='users',
        help="The ids of the users to get cluster word frequency", required=True)
    parser.add_argument('-p', '--path', dest='path', required=False,
        default=DEFAULT_PATH, help='The path of the config file', type=str)

    args = parser.parse_args()

    users = args.users.split(",")

    get_cluster_word_frequency(users, args.path)
示例#19
0
import argparse
from src.shared.utils import get_project_root
from src.scripts.parser.parse_config import parse_from_file
from src.activity.process_tweet_activity import ProcessTweetActivity

DEFAULT_PATH = str(
    get_project_root()) + "/src/scripts/config/process_tweet_config.yaml"


def process_tweet(id: str, path=DEFAULT_PATH):
    config = parse_from_file(path)

    activity = ProcessTweetActivity(config)
    activity.process_tweet_by_id(id)


if __name__ == "__main__":
    """
    Short script to process tweets
    """
    parser = argparse.ArgumentParser(description='Processes the given tweet')
    parser.add_argument('-i',
                        '--id',
                        dest='id',
                        help="The id of the tweet to process",
                        required=True,
                        type=str)
    parser.add_argument('-p',
                        '--path',
                        dest='path',
                        required=False,
示例#20
0
def run_clustering():
    click.echo("Clustering Algorithms")
    click.echo("1. Affinity Propagation")
    click.echo("2. Label Propagation")
    click.echo("3. MUISI")
    clustering_type = click.prompt("Choose a clustering algorithm", type=int)

    if clustering_type == 1:
        click.echo("Computing Affinity Propagation cluster")
        ap = AffinityPropagation()
        default_path = get_project_root(
        ) / 'src' / 'process' / 'clustering' / 'affinity_propagation' / 'ap_config.yaml'
        ap_config_path = click.prompt("Path", default_path)
        ap_config_parser = AffinityPropagationConfigParser(ap_config_path)
        wf_getter = ap_config_parser.create_getter_DAOs()
        ap_setter = ap_config_parser.create_setter_DAOs()
        ap.gen_clusters(wf_getter, ap_setter)
    elif clustering_type == 2:
        click.echo("Computing Label Propagation cluster")
        default_path = get_project_root(
        ) / 'src' / 'process' / 'clustering' / 'label_propagation' / 'lp_config.yaml'
        lp_config_path = click.prompt("Path", default_path)
        lp_config_parser = LabelPropagationConfigParser(lp_config_path)
        social_graph_getter = lp_config_parser.create_getter_DAOs()
        lp_cluster_setter = lp_config_parser.create_setter_DAOs()
        user = click.prompt("User")
        lab_prop = LabelPropagation()
        lab_prop.gen_clusters(user, social_graph_getter, lp_cluster_setter)
    elif clustering_type == 3:
        click.echo("MUISI Variants")
        click.echo("1. Tweets")
        click.echo("2. Retweets")
        muisi_variant = click.prompt("Choose a variant", type=int)

        if muisi_variant == 1:
            click.echo("Computing MUISI cluster")
            default_path = get_project_root(
            ) / 'src' / 'process' / 'clustering' / 'muisi' / 'standard' / 'muisi_config.yaml'
            muisi_config_path = click.prompt("Path", default_path)
            muisi_config_parser = MUISIConfigParser(muisi_config_path, False)
            wf_getter = muisi_config_parser.create_getter_DAOs()
            muisi_cluster_setter = muisi_config_parser.create_setter_DAOs()
            muisi = MUISI()

            # Get user args
            intersection_min = click.prompt("Intersection Min", type=float)
            popularity = click.prompt("Popularity", type=float)
            threshold = click.prompt("Threshold", type=float)
            user_count = click.prompt("User Count", type=int)
            item_count = click.prompt("Item Count", type=int)
            count = click.prompt("Count", type=int)
            is_only_popularity = click.confirm(
                "Do you wish to only compute based on popularity?")
            muisi_config = MUISIConfig(intersection_min, popularity, threshold,
                                       user_count, item_count, count,
                                       is_only_popularity)

            muisi.gen_clusters(muisi_config, wf_getter, muisi_cluster_setter)
        elif muisi_variant == 2:
            click.echo("Computing MUISI retweets cluster")
            default_path = get_project_root(
            ) / 'src' / 'process' / 'clustering' / 'muisi' / 'retweets' / 'muisi_retweets_config.yaml'
            muisi_config_path = click.prompt("Path", default_path)
            muisi_config_parser = MUISIConfigParser(muisi_config_path, True)
            tweet_getter = muisi_config_parser.create_getter_DAOs()
            muisi_cluster_setter = muisi_config_parser.create_setter_DAOs()
            muisi = MUISIRetweet()

            # Get user args
            intersection_min = click.prompt("Intersection Min", type=float)
            popularity = click.prompt("Popularity", type=float)
            user_count = click.prompt("User Count", type=int)
            muisi_config = MUISIRetweetConfig(intersection_min, popularity,
                                              user_count)

            muisi.gen_clusters(muisi_config, tweet_getter,
                               muisi_cluster_setter)
        else:
            raise Exception("Invalid input")
    else:
        raise Exception("Invalid input")
示例#21
0
import argparse
from src.shared.utils import get_project_root
from src.scripts.parser.parse_config import parse_from_file
from src.activity.construct_social_graph_activity import ConstructSocialGraphActivity

DEFAULT_PATH = str(get_project_root()
                   ) + "/src/scripts/config/construct_social_graph_config.yaml"


def construct_social_graph(seed_id: str, params=None, path=DEFAULT_PATH):
    config = parse_from_file(path)

    activity = ConstructSocialGraphActivity(config)
    activity.construct_social_graph(seed_id, params)


if __name__ == "__main__":
    """
    Short script to convert a local neighbourhood to a social graph
    """
    parser = argparse.ArgumentParser(
        description='Downloads the given number of tweets')
    parser.add_argument(
        '-s',
        '--seed_id',
        dest='seed_id',
        required=True,
        help=
        'The seed id of the local neighbourhood to convert into a social graph',
        type=str)
    parser.add_argument('-p',
示例#22
0
import argparse
from src.shared.utils import get_project_root
from src.scripts.parser.parse_config import parse_from_file
from src.dependencies.injector import Injector

DEFAULT_PATH = str(
    get_project_root()) + "/src/scripts/config/detect_core_config0.yaml"


def rank_cluster(seed_id: str, params=None, path=DEFAULT_PATH):
    injector = Injector.get_injector_from_file(path)
    process_module = injector.get_process_module()
    dao_module = injector.get_dao_module()

    cluster_getter = dao_module.get_cluster_getter()
    ranker = process_module.get_ranker('Consumption')

    clusters, _ = cluster_getter.get_clusters(seed_id)

    for cluster in clusters:
        ranker.rank(seed_id, cluster)


if __name__ == "__main__":
    """
    Short script to perform clustering on a social graph
    """
    parser = argparse.ArgumentParser(
        description='Downloads the given number of tweets')
    parser.add_argument(
        '-s',
示例#23
0
from src.algorithm.Activity.top_users_activity import TopUsersActivity
import argparse
import time
from src.algorithm.top_users.parser.parse_config import parse_from_file
from src.shared.utils import get_project_root

DEFAULT_PATH = str(get_project_root()) + "/src/algorithm/top_users/config/rank_users.yaml"
DEFAULT_METHOD = "Label Propagation"

def top_users(cluster_num: int, base_user: str, cluster_type: str, path=DEFAULT_PATH):
    config = parse_from_file(path)

    activity = TopUsersActivity(config)
    activity.find_top_users(cluster_num, base_user, cluster_type)

if __name__ == "__main__":
    """
    Clean inactive users in the friends list.
    """
    parser = argparse.ArgumentParser(description='Get rid of inactive users')
    parser.add_argument('-c', '--cluster number', dest='cluster_num', required=True,
        help='which cluster to find top words', type=int)
    parser.add_argument('-u', '--user', dest='base_user', required=True,
        help='The target user', type=str)
    parser.add_argument('-t', '--clustering type', dest='cluster_type', required=False,
        default=DEFAULT_METHOD, help='Label Propagation / Max Modularity', type=str)
    parser.add_argument('-p', '--path', dest='path', required=False,
        default=DEFAULT_PATH, help='The path of the config file', type=str)

    args = parser.parse_args()
示例#24
0
from src.dependencies.injector import Injector
from src.shared.utils import get_project_root
from src.shared.logger_factory import LoggerFactory
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
from src.model.local_neighbourhood import LocalNeighbourhood
import json
import logging
import random
import gc

log = LoggerFactory.logger(__name__, logging.ERROR)

DEFAULT_PATH = str(
    get_project_root()) + "/src/scripts/config/default_config.yaml"


def produce_plots(user_name: str, thresh, i, path=DEFAULT_PATH):

    injector = Injector.get_injector_from_file(path)
    process_module = injector.get_process_module()
    dao_module = injector.get_dao_module()

    user_friend_getter = dao_module.get_user_friend_getter()
    friends_cleaner = process_module.get_extended_friends_cleaner()
    social_graph_constructor = process_module.get_social_graph_constructor()
    clusterer = process_module.get_clusterer()
    user_getter = dao_module.get_user_getter()

    seed_id = user_getter.get_user_by_screen_name(user_name).id
示例#25
0
import argparse
from src.shared.utils import get_project_root
from src.dependencies.injector import Injector

DEFAULT_PATH = str(
    get_project_root()) + "/src/scripts/config/user_word_frequency_config.yaml"


def get_user_word_frequency(id, path=DEFAULT_PATH):
    injector = Injector.get_injector_from_file(path)
    process_module = injector.get_process_module()

    user_word_frequency_processor = process_module.get_user_word_frequency_processor(
    )

    user_word_frequency_processor.process_user_word_frequency_vector(id)
    user_word_frequency_processor.process_relative_user_word_frequency(id)


if __name__ == "__main__":
    """
    Short script to download users
    """
    parser = argparse.ArgumentParser(description='Downloads the given user')
    parser.add_argument('-u',
                        '--name',
                        dest='name',
                        help="The id of the user to get word frequency",
                        required=True)
    parser.add_argument('-p',
                        '--path',