def main(topology):
    # the input to main is the path to the topology file
    # the output to this script saves two json files inside the downloaded tweets directory,
    # one json file has all the active users the other has all inactive users from the topology
    # user activity is based on status count and availabilty of tweets (public vs private) 
    #
    # this script can be stopped and started in the middle of running it without losing progress

    inactive_users = read_json('dnld_tweets/inactive_users.json')
    active_users = read_json('dnld_tweets/active_users.json')
    twpy_api = auth.get_access_creds()
    tweets_dir = './dnld_tweets/'

    # put every single user (non repeating) from the topology file into a set
    with open(topology, 'r') as inp_file:
        comm_set = set(user for community in inp_file for user in ast.literal_eval(community))

    # create directory for storing tweets
    if not os.path.exists(os.path.dirname(tweets_dir)):
        os.makedirs(os.path.dirname(tweets_dir), 0o755)

    # download tweets for every single user in the set
    # separate active users from inactive users based on status count and availability
    bar = pyprind.ProgPercent(len(comm_set), track_time=True, title='Downloading Tweets') 
    while comm_set:
        user = comm_set.pop()
        bar.update(item_id=str(user) + '\t')

        if str(user) in inactive_users or str(user) in active_users:
            continue

        # skip user if they don't exist or are inactive
        status_count = user_status_count(user, twpy_api)
        if status_count <= 10:
            inactive_users[str(user)] = status_count
            write_json(tweets_dir, active_users, inactive_users)
            continue

        # skip user if already downloaded their tweets
        if os.path.exists(os.path.join(tweets_dir, str(user))):
            active_users[str(user)] = status_count
            write_json(tweets_dir, active_users, inactive_users)
            continue

        tweets = get_tweets(user, twpy_api)

        if tweets:
            tweet_filename = tweets_dir + str(user)
            write_tweets(tweets, tweet_filename)
            active_users[str(user)] = status_count
        else:
            inactive_users[str(user)] = 0 

        write_json(tweets_dir, active_users, inactive_users)
def main(topology):
    inactive_users = read_json('dnld_tweets/inactive_users.json')
    active_users = read_json('dnld_tweets/active_users.json')
    _, app_auths = auth.get_access_creds()
    tweets_dir = './dnld_tweets/'

    with open(topology, 'r') as inp_file:
        comm_set = set(user for community in inp_file
                       for user in ast.literal_eval(community))

    if not os.path.exists(os.path.dirname(tweets_dir)):
        os.makedirs(os.path.dirname(tweets_dir), 0o755)

    bar = pyprind.ProgPercent(len(comm_set),
                              track_time=True,
                              title='Downloading Tweets')
    while comm_set:
        user = comm_set.pop()
        bar.update(item_id=user)

        if str(user) in inactive_users:
            continue

        api = auth.manage_auth_handlers(app_auths)

        # skip user if they don't exist or are inactive
        status_count = user_status_count(user, api)
        if status_count <= 10:
            inactive_users[str(user)] = status_count
            write_json(tweets_dir, active_users, inactive_users)
            continue

        # skip user if you've already downloaded their tweets
        if os.path.exists(os.path.join(tweets_dir, str(user))):
            active_users[str(user)] = status_count
            write_json(tweets_dir, active_users, inactive_users)
            continue

        tweets = get_tweets(user, api)

        if tweets:
            tweet_filename = tweets_dir + str(user)
            write_tweets(tweets, tweet_filename)
            active_users[str(user)] = status_count
        else:
            inactive_users[str(user)] = 0

        write_json(tweets_dir, active_users, inactive_users)
def main():
    search_dir = 'twitter_geo_searches/'
    if not os.path.exists(os.path.dirname(search_dir)):
        os.makedirs(os.path.dirname(search_dir), 0o755)

    twpy_api = auth.get_access_creds()
    pool = multiprocessing.Pool(max(1, multiprocessing.cpu_count() - 1))

    # set up the command line arguments
    parser = argparse.ArgumentParser(
        description=
        'Get twitter user ids and their follower ids from Tweepy and save in different formats'
    )
    subparsers = parser.add_subparsers(dest='mode')

    search_parser = subparsers.add_parser(
        'search',
        help='Gather Twitter user ids and followers by city, state and radius')
    search_parser.add_argument(
        '-c',
        '--city',
        required=True,
        action='store',
        dest='city',
        help='City to search for Twitter user ids. REQUIRED')
    search_parser.add_argument(
        '-s',
        '--state',
        required=True,
        action='store',
        dest='state',
        help='State to search for Twitter user ids. REQUIRED')
    search_parser.add_argument(
        '-r',
        '--radius',
        required=True,
        action='store',
        dest='radius',
        help=
        'Radius to search Twitter API for user ids (miles or kilometers -- ex: 50mi or 50km). REQUIRED'
    )
    search_parser.add_argument(
        '-f',
        '--filename',
        required=True,
        action='store',
        dest='filename',
        help='Name of output file for networkx graph data. REQUIRED')

    netx_parser = subparsers.add_parser(
        'netx', help='Perform operations on already generated networkx graph')
    netx_parser.add_argument('-q',
                             '--clique',
                             action='store_true',
                             help='Find cliques with networkx')
    netx_parser.add_argument(
        '-x',
        '--clq_filename',
        action='store',
        help='Provide a filename for the serialized output of find_cliques')
    netx_parser.add_argument('-g',
                             '--graph_filename',
                             required=True,
                             action='store',
                             dest='graph_filename',
                             help='Networkx input data filename. REQUIRED')
    netx_parser.add_argument('-o',
                             '--out_filename',
                             required=True,
                             action='store',
                             dest='out_filename',
                             help='Networkx output data filename REQUIRED')
    netx_parser.add_argument('-k',
                             '--comm',
                             action='store_true',
                             help='Find communities with networkx')
    netx_parser.add_argument('-p',
                             '--print_graph',
                             action='store_true',
                             help='Print networkx graph')

    argcomplete.autocomplete(parser)
    args = parser.parse_args()

    if not args.mode:
        print('ERROR: No arguments provided. Use -h or --help for help')
        return

    if args.mode == 'search':
        city = args.city
        state = args.state
        search_radius = args.radius
        search_filename = args.filename + '.json'

        # gets the first 50 zip codes by city and state
        zip_search = SearchEngine()
        zipcodes = zip_search.by_city_and_state(city, state, returns=50)

        user_ids = []
        user_followers = []
        # gets the user ids at each geo-location for the retrieved zip codes
        bar = pyprind.ProgPercent(len(zipcodes),
                                  track_time=True,
                                  title='Finding user ids')
        for zipcode in zipcodes:
            bar.update(item_id=str(zipcode.zipcode) + '\t')
            latitude = zipcode.lat
            longitude = zipcode.lng
            user_ids.extend(
                get_user_ids(twpy_api, latitude, longitude, search_radius))

        n = 2
        # gets the followers of all the retrieved user ids n number of depths
        for i in range(0, n):
            user_ids, user_followers = get_user_followers(
                twpy_api, set(user_ids))

        filename = os.path.join(search_dir, search_filename)
        save_user_follower_networkx_graph(user_followers, filename)

    if args.mode == 'netx':
        graph_filename = os.path.join(search_dir,
                                      args.graph_filename + '.json')
        output_filename = os.path.join(search_dir, args.out_filename + '.json')
        graph = open_nx_graph(graph_filename)
        cliques = []

        if args.clique:
            for clique in pool.map(gather_cliques, nx.find_cliques(graph)):
                cliques.append([int(member) for member in clique])

            with open(output_filename, 'w') as output:
                for clique in cliques:
                    output.write('%s,\n' % (clique))

        elif args.comm:
            if args.clq_filename:
                clique_filename = os.path.join(search_dir,
                                               args.clq_filename + '.json')
                # load the clique topology file
                with open(clique_filename, 'r') as find_cliques_file:
                    cliques = [
                        clique for cliques in find_cliques_file
                        for clique in ast.literal_eval(cliques)
                    ]

            with open(output_filename, "w") as output:
                for node in pool.map(gather_cliques,
                                     community.girvan_newman(graph)):
                    print(node)
                    #output.write(str([int(item) for item in node]) + ', \n')
        elif args.print_graph:
            nx.draw(graph)
            plt.show()

    print("Job complete")
def main():
    # set up the command line arguments
    parser = argparse.ArgumentParser(
        description=
        'Get twitter user ids and their follower ids using Tweepy and save in different formats'
    )
    subparsers = parser.add_subparsers(dest='mode')

    search_parser = subparsers.add_parser(
        'search', help='Gather Twitter user ids by city, state and radius')
    search_parser.add_argument(
        '-c',
        '--city',
        required=True,
        action='store',
        dest='city',
        help='City to search for Twitter user ids. REQUIRED')
    search_parser.add_argument(
        '-s',
        '--state',
        required=True,
        action='store',
        dest='state',
        help='State to search for Twitter user ids. REQUIRED')
    search_parser.add_argument(
        '-r',
        '--radius',
        required=True,
        action='store',
        dest='radius',
        help=
        'Radius to search Twitter API for user ids (miles or kilometers -- ex: 50mi or 50km). REQUIRED'
    )
    search_parser.add_argument(
        '-d',
        '--depth',
        required=True,
        action='store',
        dest='depth',
        help=
        'This value represents how far to traverse into user follower relationships when gathering users. REQUIRED'
    )
    search_parser.add_argument(
        '-f',
        '--filename',
        required=True,
        action='store',
        dest='filename',
        help='Name of output file to store gathered users in. REQUIRED')
    search_parser.add_argument(
        '-z',
        '--creds',
        required=True,
        action='store',
        dest='creds',
        help='Path to Twitter developer access credentials REQUIRED')

    continue_parser = subparsers.add_parser(
        'getfws',
        help=
        'Takes in already gathered jsonified list of users and retrieves their followers'
    )
    continue_parser.add_argument(
        '-f',
        '--filename',
        action='store',
        help=
        'Filename of the previously saved Twitter users ids in .json format')
    continue_parser.add_argument(
        '-d',
        '--depth',
        required=True,
        action='store',
        dest='depth',
        help=
        'This value represents how far to traverse into user follower relationships when searching for followers. REQUIRED'
    )
    continue_parser.add_argument(
        '-z',
        '--creds',
        required=True,
        action='store',
        dest='creds',
        help='Path to Twitter developer access credentials REQUIRED')

    convert_parser = subparsers.add_parser(
        'convert',
        help=
        'Convert user followers dict to users list and save file. This is the file format used when continuing the get followers function and in get_community_tweets.py'
    )
    convert_parser.add_argument(
        '-i',
        '--input_file',
        action='store',
        help='Filename of the previously saved followers dictionary')
    convert_parser.add_argument(
        '-o',
        '--out_file',
        action='store',
        help=
        'Filename to store the output. Just the filename no path is needed. The output file will be saved in the folder of the input file'
    )

    netx_parser = subparsers.add_parser(
        'netx', help='Create cliques or communities from user follower data')
    group = netx_parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-q',
                       '--gen_cliques',
                       required=False,
                       action='store_true',
                       dest='gen_cliques',
                       help='Generate cliques from user followers dictionary')
    group.add_argument(
        '-c',
        '--gen_comms',
        required=False,
        action='store_true',
        dest='gen_comms',
        help='Generate communities from user followers dictionary')
    netx_parser.add_argument(
        '-n',
        '--min_size',
        action='store',
        dest='min_size',
        nargs='?',
        type=int,
        const=1,
        default=4,
        help='Constraint for min size of clique or community (default is 4)')
    netx_parser.add_argument('-i',
                             '--in_filename',
                             required=True,
                             action='store',
                             dest='in_filename',
                             help='User followers dictionary file REQUIRED')
    netx_parser.add_argument('-o',
                             '--out_filename',
                             required=True,
                             action='store',
                             dest='out_filename',
                             help='Output topology filename REQUIRED')

    argcomplete.autocomplete(parser)
    args = parser.parse_args()

    if args.mode == 'convert':
        working_dir = get_directory_of_file(args.input_file)
        convert_followers_to_users(args.input_file, args.out_file, working_dir)

    if args.mode == 'getfws':
        twpy_api = auth.get_access_creds(args.creds)

        if not twpy_api:
            print('Error: Twitter developer access credentials denied')
            return

        working_dir = get_directory_of_file(args.filename)

        user_ids = read_json(args.filename)
        if not user_ids:
            print('Error: No users found in provided file')
            return

        # gets the followers of all the retrieved user ids 'depth' number of times
        collect_user_followers(args.depth, twpy_api, working_dir,
                               args.filename, user_ids)

    if args.mode == 'search':
        twpy_api = auth.get_access_creds(args.creds)

        if not twpy_api:
            print('Error: Twitter developer access credentials denied')
            return

        working_dir = get_directory_of_file(args.filename)

        # gets the first 50 zip codes by city and state
        zip_search = SearchEngine()
        zipcodes = zip_search.by_city_and_state(args.city,
                                                args.state,
                                                returns=50)

        user_ids = []
        user_followers = []
        # gets the user ids at each geo-location for the retrieved zip codes
        bar = pyprind.ProgPercent(len(zipcodes),
                                  track_time=True,
                                  title='Finding user ids')
        for zipcode in zipcodes:
            bar.update(item_id='zip code:' + str(zipcode.zipcode) + '\t')
            user_ids.extend(
                get_user_ids(twpy_api, zipcode.lat, zipcode.lng, args.radius))
            write_json(args.filename, list(set(user_ids)))

    if args.mode == 'netx':
        user_followers = read_json(args.in_filename)
        pythonify_dict(user_followers)
        print("Number of followers: " + str(len(user_followers)))
        output_filename = args.out_filename + '.json'
        graph = build_netx_graph(user_followers)

        if args.gen_cliques:
            generate_cliques(graph, output_filename, args.min_size)
        if args.gen_comms:
            generate_communities(graph, output_filename, args.min_size)
def main():
    # the output to this script saves two json files inside the downloaded tweets directory,
    # one json file has all the active users the other has all inactive users from the topology
    # user activity is based on status count and availabilty of tweets (public vs private)

    # script can be stopped and started in the middle of running it without losing progress
    parser = argparse.ArgumentParser(
        description=
        'Get tweets of all twitter user ids in the provided topology file')
    parser.add_argument('-f',
                        '--users_file',
                        required=True,
                        action='store',
                        dest='users_file',
                        help='Location of file with user ids')
    parser.add_argument(
        '-c',
        '--dev_creds',
        required=True,
        action='store',
        dest='dev_creds',
        help='Location of file containing Twitter developer access credentials'
    )
    parser.add_argument(
        '-o',
        '--output_dir',
        required=True,
        action='store',
        dest='output_dir',
        help='Name of the directory you want to download Tweets to')
    parser.add_argument(
        '-n',
        '--num_tweets',
        action='store',
        dest='num_tweets',
        nargs='?',
        type=int,
        const=1,
        default=3200,
        help='Number of tweets to download from user (default is 3200)')
    argcomplete.autocomplete(parser)
    args = parser.parse_args()

    tweets_dir = args.output_dir

    # create directory for storing tweets
    if not os.path.exists(os.path.dirname(tweets_dir)):
        os.makedirs(os.path.dirname(tweets_dir), 0o755)

    inactive_users = read_json(os.path.join(tweets_dir, 'inactive_users.json'))
    active_users = read_json(os.path.join(tweets_dir, 'active_users.json'))
    twpy_api = auth.get_access_creds(args.dev_creds)

    if not twpy_api:
        print('Error: Twitter developer access credentials denied')
        return

    # open the lists of user ids. this file should already be a non-repeating set
    comm_set = set(read_json(args.users_file))

    # download tweets for every single user in the set
    # separate active users from inactive users based on status count and availability
    bar = pyprind.ProgPercent(len(comm_set),
                              track_time=True,
                              title='Downloading Tweets')
    while comm_set:
        user = comm_set.pop()
        bar.update(item_id=str(user) + '\t')

        if str(user) in inactive_users or str(user) in active_users:
            continue

        # skip user if they don't exist or are inactive
        status_count = user_status_count(user, twpy_api)
        if status_count <= 10:
            inactive_users[str(user)] = status_count
            write_json(tweets_dir, active_users, inactive_users)
            continue

        # skip user if already downloaded their tweets
        if os.path.exists(os.path.join(tweets_dir, str(user))):
            active_users[str(user)] = status_count
            write_json(tweets_dir, active_users, inactive_users)
            continue

        tweets = get_tweets(user, twpy_api, args.num_tweets)

        if tweets:
            tweet_filename = os.path.join(tweets_dir, str(user))
            write_tweets(tweets, tweet_filename)
            active_users[str(user)] = status_count
        else:
            inactive_users[str(user)] = 0

        write_json(tweets_dir, active_users, inactive_users)